diff options
author | Andy Belle-Isle <drumsetmonkey@gmail.com> | 2019-08-25 16:39:02 -0400 |
---|---|---|
committer | Andy Belle-Isle <drumsetmonkey@gmail.com> | 2019-08-25 16:39:02 -0400 |
commit | bb283f4581ac07a4dd75325c4343d7f28b1ff376 (patch) | |
tree | f3892f8b77177149ae0294e7241d2ecaf58b64a6 | |
parent | d48436bbf5032484ff7d9c2667a63672e306ca18 (diff) |
Fixed LuaJIT files
-rw-r--r-- | lib/LuaJIT/.gitignore | 11 | ||||
-rw-r--r-- | lib/LuaJIT/COPYRIGHT | 56 | ||||
-rw-r--r-- | lib/LuaJIT/Makefile | 825 | ||||
-rw-r--r-- | lib/LuaJIT/README | 16 | ||||
-rw-r--r-- | lib/LuaJIT/doc/bluequad-print.css | 166 | ||||
-rw-r--r-- | lib/LuaJIT/doc/bluequad.css | 325 | ||||
-rw-r--r-- | lib/LuaJIT/doc/changes.html | 882 | ||||
-rw-r--r-- | lib/LuaJIT/doc/contact.html | 110 | ||||
-rw-r--r-- | lib/LuaJIT/doc/ext_c_api.html | 188 | ||||
-rw-r--r-- | lib/LuaJIT/doc/ext_ffi.html | 331 | ||||
-rw-r--r-- | lib/LuaJIT/doc/ext_ffi_api.html | 571 | ||||
-rw-r--r-- | lib/LuaJIT/doc/ext_ffi_semantics.html | 1261 | ||||
-rw-r--r-- | lib/LuaJIT/doc/ext_ffi_tutorial.html | 602 | ||||
-rw-r--r-- | lib/LuaJIT/doc/ext_jit.html | 200 | ||||
-rw-r--r-- | lib/LuaJIT/doc/ext_profiler.html | 364 | ||||
-rw-r--r-- | lib/LuaJIT/doc/extensions.html | 482 | ||||
-rw-r--r-- | lib/LuaJIT/doc/faq.html | 185 | ||||
-rw-r--r-- | lib/LuaJIT/doc/img/contact.png | bin | 0 -> 1340 bytes | |||
-rw-r--r-- | lib/LuaJIT/doc/install.html | 691 | ||||
-rw-r--r-- | lib/LuaJIT/doc/luajit.html | 235 | ||||
-rw-r--r-- | lib/LuaJIT/doc/running.html | 308 | ||||
-rw-r--r-- | lib/LuaJIT/doc/status.html | 122 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_arm.h | 458 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_arm.lua | 1125 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_arm64.h | 519 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_arm64.lua | 1166 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_mips.h | 420 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_mips.lua | 1008 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_mips64.lua | 12 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_ppc.h | 420 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_ppc.lua | 1919 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_proto.h | 83 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_x64.lua | 12 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_x86.h | 509 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dasm_x86.lua | 2360 | ||||
-rw-r--r-- | lib/LuaJIT/dynasm/dynasm.lua | 1094 | ||||
-rw-r--r-- | lib/LuaJIT/etc/luajit.1 | 88 | ||||
-rw-r--r-- | lib/LuaJIT/etc/luajit.pc | 25 | ||||
-rw-r--r-- | lib/LuaJIT/libluajit.a | bin | 866936 -> 0 bytes | |||
-rwxr-xr-x | lib/LuaJIT/libluajit.so | bin | 503336 -> 0 bytes | |||
-rw-r--r-- | lib/LuaJIT/src/.gitignore | 7 | ||||
-rw-r--r-- | lib/LuaJIT/src/Makefile | 721 | ||||
-rw-r--r-- | lib/LuaJIT/src/Makefile.dep (renamed from lib/LuaJIT/Makefile.dep) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/host/.gitignore (renamed from lib/LuaJIT/host/.gitignore) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/host/README (renamed from lib/LuaJIT/host/README) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/host/buildvm.c (renamed from lib/LuaJIT/host/buildvm.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/host/buildvm.h (renamed from lib/LuaJIT/host/buildvm.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/host/buildvm_asm.c (renamed from lib/LuaJIT/host/buildvm_asm.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/host/buildvm_fold.c (renamed from lib/LuaJIT/host/buildvm_fold.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/host/buildvm_lib.c (renamed from lib/LuaJIT/host/buildvm_lib.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/host/buildvm_libbc.h (renamed from lib/LuaJIT/host/buildvm_libbc.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/host/buildvm_peobj.c (renamed from lib/LuaJIT/host/buildvm_peobj.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/host/genlibbc.lua (renamed from lib/LuaJIT/host/genlibbc.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/host/genminilua.lua (renamed from lib/LuaJIT/host/genminilua.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/host/minilua.c (renamed from lib/LuaJIT/host/minilua.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/.gitignore (renamed from lib/LuaJIT/jit/.gitignore) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/bc.lua (renamed from lib/LuaJIT/jit/bc.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/bcsave.lua (renamed from lib/LuaJIT/jit/bcsave.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/dis_arm.lua (renamed from lib/LuaJIT/jit/dis_arm.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/dis_arm64.lua (renamed from lib/LuaJIT/jit/dis_arm64.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/dis_arm64be.lua (renamed from lib/LuaJIT/jit/dis_arm64be.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/dis_mips.lua (renamed from lib/LuaJIT/jit/dis_mips.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/dis_mips64.lua (renamed from lib/LuaJIT/jit/dis_mips64.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/dis_mips64el.lua (renamed from lib/LuaJIT/jit/dis_mips64el.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/dis_mipsel.lua (renamed from lib/LuaJIT/jit/dis_mipsel.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/dis_ppc.lua (renamed from lib/LuaJIT/jit/dis_ppc.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/dis_x64.lua (renamed from lib/LuaJIT/jit/dis_x64.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/dis_x86.lua (renamed from lib/LuaJIT/jit/dis_x86.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/dump.lua (renamed from lib/LuaJIT/jit/dump.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/p.lua (renamed from lib/LuaJIT/jit/p.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/v.lua (renamed from lib/LuaJIT/jit/v.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/jit/zone.lua (renamed from lib/LuaJIT/jit/zone.lua) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lauxlib.h (renamed from lib/LuaJIT/lauxlib.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_aux.c (renamed from lib/LuaJIT/lib_aux.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_base.c (renamed from lib/LuaJIT/lib_base.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_bit.c (renamed from lib/LuaJIT/lib_bit.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_debug.c (renamed from lib/LuaJIT/lib_debug.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_ffi.c (renamed from lib/LuaJIT/lib_ffi.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_init.c (renamed from lib/LuaJIT/lib_init.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_io.c (renamed from lib/LuaJIT/lib_io.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_jit.c (renamed from lib/LuaJIT/lib_jit.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_math.c (renamed from lib/LuaJIT/lib_math.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_os.c (renamed from lib/LuaJIT/lib_os.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_package.c (renamed from lib/LuaJIT/lib_package.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_string.c (renamed from lib/LuaJIT/lib_string.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lib_table.c (renamed from lib/LuaJIT/lib_table.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj.supp (renamed from lib/LuaJIT/lj.supp) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_alloc.c (renamed from lib/LuaJIT/lj_alloc.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_alloc.h (renamed from lib/LuaJIT/lj_alloc.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_api.c (renamed from lib/LuaJIT/lj_api.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_arch.h (renamed from lib/LuaJIT/lj_arch.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_asm.c (renamed from lib/LuaJIT/lj_asm.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_asm.h (renamed from lib/LuaJIT/lj_asm.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_asm_arm.h (renamed from lib/LuaJIT/lj_asm_arm.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_asm_arm64.h (renamed from lib/LuaJIT/lj_asm_arm64.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_asm_mips.h (renamed from lib/LuaJIT/lj_asm_mips.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_asm_ppc.h (renamed from lib/LuaJIT/lj_asm_ppc.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_asm_x86.h (renamed from lib/LuaJIT/lj_asm_x86.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_bc.c (renamed from lib/LuaJIT/lj_bc.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_bc.h (renamed from lib/LuaJIT/lj_bc.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_bcdump.h (renamed from lib/LuaJIT/lj_bcdump.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_bcread.c (renamed from lib/LuaJIT/lj_bcread.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_bcwrite.c (renamed from lib/LuaJIT/lj_bcwrite.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_buf.c (renamed from lib/LuaJIT/lj_buf.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_buf.h (renamed from lib/LuaJIT/lj_buf.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_carith.c (renamed from lib/LuaJIT/lj_carith.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_carith.h (renamed from lib/LuaJIT/lj_carith.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_ccall.c (renamed from lib/LuaJIT/lj_ccall.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_ccall.h (renamed from lib/LuaJIT/lj_ccall.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_ccallback.c (renamed from lib/LuaJIT/lj_ccallback.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_ccallback.h (renamed from lib/LuaJIT/lj_ccallback.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_cconv.c (renamed from lib/LuaJIT/lj_cconv.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_cconv.h (renamed from lib/LuaJIT/lj_cconv.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_cdata.c (renamed from lib/LuaJIT/lj_cdata.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_cdata.h (renamed from lib/LuaJIT/lj_cdata.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_char.c (renamed from lib/LuaJIT/lj_char.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_char.h (renamed from lib/LuaJIT/lj_char.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_clib.c (renamed from lib/LuaJIT/lj_clib.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_clib.h (renamed from lib/LuaJIT/lj_clib.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_cparse.c (renamed from lib/LuaJIT/lj_cparse.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_cparse.h (renamed from lib/LuaJIT/lj_cparse.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_crecord.c (renamed from lib/LuaJIT/lj_crecord.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_crecord.h (renamed from lib/LuaJIT/lj_crecord.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_ctype.c (renamed from lib/LuaJIT/lj_ctype.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_ctype.h (renamed from lib/LuaJIT/lj_ctype.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_debug.c (renamed from lib/LuaJIT/lj_debug.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_debug.h (renamed from lib/LuaJIT/lj_debug.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_def.h (renamed from lib/LuaJIT/lj_def.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_dispatch.c (renamed from lib/LuaJIT/lj_dispatch.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_dispatch.h (renamed from lib/LuaJIT/lj_dispatch.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_emit_arm.h (renamed from lib/LuaJIT/lj_emit_arm.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_emit_arm64.h (renamed from lib/LuaJIT/lj_emit_arm64.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_emit_mips.h (renamed from lib/LuaJIT/lj_emit_mips.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_emit_ppc.h (renamed from lib/LuaJIT/lj_emit_ppc.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_emit_x86.h (renamed from lib/LuaJIT/lj_emit_x86.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_err.c (renamed from lib/LuaJIT/lj_err.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_err.h (renamed from lib/LuaJIT/lj_err.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_errmsg.h (renamed from lib/LuaJIT/lj_errmsg.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_ff.h (renamed from lib/LuaJIT/lj_ff.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_ffrecord.c (renamed from lib/LuaJIT/lj_ffrecord.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_ffrecord.h (renamed from lib/LuaJIT/lj_ffrecord.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_frame.h (renamed from lib/LuaJIT/lj_frame.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_func.c (renamed from lib/LuaJIT/lj_func.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_func.h (renamed from lib/LuaJIT/lj_func.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_gc.c (renamed from lib/LuaJIT/lj_gc.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_gc.h (renamed from lib/LuaJIT/lj_gc.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_gdbjit.c (renamed from lib/LuaJIT/lj_gdbjit.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_gdbjit.h (renamed from lib/LuaJIT/lj_gdbjit.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_ir.c (renamed from lib/LuaJIT/lj_ir.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_ir.h (renamed from lib/LuaJIT/lj_ir.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_ircall.h (renamed from lib/LuaJIT/lj_ircall.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_iropt.h (renamed from lib/LuaJIT/lj_iropt.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_jit.h (renamed from lib/LuaJIT/lj_jit.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_lex.c (renamed from lib/LuaJIT/lj_lex.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_lex.h (renamed from lib/LuaJIT/lj_lex.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_lib.c (renamed from lib/LuaJIT/lj_lib.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_lib.h (renamed from lib/LuaJIT/lj_lib.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_load.c (renamed from lib/LuaJIT/lj_load.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_mcode.c (renamed from lib/LuaJIT/lj_mcode.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_mcode.h (renamed from lib/LuaJIT/lj_mcode.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_meta.c (renamed from lib/LuaJIT/lj_meta.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_meta.h (renamed from lib/LuaJIT/lj_meta.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_obj.c (renamed from lib/LuaJIT/lj_obj.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_obj.h (renamed from lib/LuaJIT/lj_obj.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_opt_dce.c (renamed from lib/LuaJIT/lj_opt_dce.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_opt_fold.c (renamed from lib/LuaJIT/lj_opt_fold.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_opt_loop.c (renamed from lib/LuaJIT/lj_opt_loop.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_opt_mem.c (renamed from lib/LuaJIT/lj_opt_mem.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_opt_narrow.c (renamed from lib/LuaJIT/lj_opt_narrow.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_opt_sink.c (renamed from lib/LuaJIT/lj_opt_sink.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_opt_split.c (renamed from lib/LuaJIT/lj_opt_split.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_parse.c (renamed from lib/LuaJIT/lj_parse.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_parse.h (renamed from lib/LuaJIT/lj_parse.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_profile.c (renamed from lib/LuaJIT/lj_profile.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_profile.h (renamed from lib/LuaJIT/lj_profile.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_record.c (renamed from lib/LuaJIT/lj_record.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_record.h (renamed from lib/LuaJIT/lj_record.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_snap.c (renamed from lib/LuaJIT/lj_snap.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_snap.h (renamed from lib/LuaJIT/lj_snap.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_state.c (renamed from lib/LuaJIT/lj_state.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_state.h (renamed from lib/LuaJIT/lj_state.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_str.c (renamed from lib/LuaJIT/lj_str.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_str.h (renamed from lib/LuaJIT/lj_str.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_strfmt.c (renamed from lib/LuaJIT/lj_strfmt.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_strfmt.h (renamed from lib/LuaJIT/lj_strfmt.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_strfmt_num.c (renamed from lib/LuaJIT/lj_strfmt_num.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_strscan.c (renamed from lib/LuaJIT/lj_strscan.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_strscan.h (renamed from lib/LuaJIT/lj_strscan.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_tab.c (renamed from lib/LuaJIT/lj_tab.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_tab.h (renamed from lib/LuaJIT/lj_tab.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_target.h (renamed from lib/LuaJIT/lj_target.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_target_arm.h (renamed from lib/LuaJIT/lj_target_arm.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_target_arm64.h (renamed from lib/LuaJIT/lj_target_arm64.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_target_mips.h (renamed from lib/LuaJIT/lj_target_mips.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_target_ppc.h (renamed from lib/LuaJIT/lj_target_ppc.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_target_x86.h (renamed from lib/LuaJIT/lj_target_x86.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_trace.c (renamed from lib/LuaJIT/lj_trace.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_trace.h (renamed from lib/LuaJIT/lj_trace.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_traceerr.h (renamed from lib/LuaJIT/lj_traceerr.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_udata.c (renamed from lib/LuaJIT/lj_udata.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_udata.h (renamed from lib/LuaJIT/lj_udata.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_vm.h (renamed from lib/LuaJIT/lj_vm.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_vmevent.c (renamed from lib/LuaJIT/lj_vmevent.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_vmevent.h (renamed from lib/LuaJIT/lj_vmevent.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lj_vmmath.c (renamed from lib/LuaJIT/lj_vmmath.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/ljamalg.c (renamed from lib/LuaJIT/ljamalg.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lua.h (renamed from lib/LuaJIT/lua.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lua.hpp (renamed from lib/LuaJIT/lua.hpp) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/luaconf.h (renamed from lib/LuaJIT/luaconf.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/luajit.c (renamed from lib/LuaJIT/luajit.c) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/luajit.h (renamed from lib/LuaJIT/luajit.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/lualib.h (renamed from lib/LuaJIT/lualib.h) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/msvcbuild.bat (renamed from lib/LuaJIT/msvcbuild.bat) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/ps4build.bat (renamed from lib/LuaJIT/ps4build.bat) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/psvitabuild.bat (renamed from lib/LuaJIT/psvitabuild.bat) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/vm_arm.dasc (renamed from lib/LuaJIT/vm_arm.dasc) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/vm_arm64.dasc (renamed from lib/LuaJIT/vm_arm64.dasc) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/vm_mips.dasc (renamed from lib/LuaJIT/vm_mips.dasc) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/vm_mips64.dasc (renamed from lib/LuaJIT/vm_mips64.dasc) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/vm_ppc.dasc (renamed from lib/LuaJIT/vm_ppc.dasc) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/vm_x64.dasc (renamed from lib/LuaJIT/vm_x64.dasc) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/vm_x86.dasc (renamed from lib/LuaJIT/vm_x86.dasc) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/xb1build.bat (renamed from lib/LuaJIT/xb1build.bat) | 0 | ||||
-rw-r--r-- | lib/LuaJIT/src/xedkbuild.bat (renamed from lib/LuaJIT/xedkbuild.bat) | 0 | ||||
-rw-r--r-- | lib/libentityx.a | bin | 390786 -> 414808 bytes |
225 files changed, 19187 insertions, 690 deletions
diff --git a/lib/LuaJIT/.gitignore b/lib/LuaJIT/.gitignore new file mode 100644 index 0000000..1a07bf7 --- /dev/null +++ b/lib/LuaJIT/.gitignore @@ -0,0 +1,11 @@ +*.[oa] +*.so +*.obj +*.lib +*.exp +*.dll +*.exe +*.manifest +*.dmp +*.swp +.tags diff --git a/lib/LuaJIT/COPYRIGHT b/lib/LuaJIT/COPYRIGHT new file mode 100644 index 0000000..6ed4002 --- /dev/null +++ b/lib/LuaJIT/COPYRIGHT @@ -0,0 +1,56 @@ +=============================================================================== +LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ + +Copyright (C) 2005-2017 Mike Pall. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +[ MIT license: http://www.opensource.org/licenses/mit-license.php ] + +=============================================================================== +[ LuaJIT includes code from Lua 5.1/5.2, which has this license statement: ] + +Copyright (C) 1994-2012 Lua.org, PUC-Rio. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +=============================================================================== +[ LuaJIT includes code from dlmalloc, which has this license statement: ] + +This is a version (aka dlmalloc) of malloc/free/realloc written by +Doug Lea and released to the public domain, as explained at +http://creativecommons.org/licenses/publicdomain + +=============================================================================== diff --git a/lib/LuaJIT/Makefile b/lib/LuaJIT/Makefile index d22eb73..0f93308 100644 --- a/lib/LuaJIT/Makefile +++ b/lib/LuaJIT/Makefile @@ -1,11 +1,14 @@ ############################################################################## -# LuaJIT Makefile. Requires GNU Make. +# LuaJIT top level Makefile for installation. Requires GNU Make. # # Please read doc/install.html before changing any variables! # # Suitable for POSIX platforms (Linux, *BSD, OSX etc.). -# Also works with MinGW and Cygwin on Windows. -# Please check msvcbuild.bat for building with MSVC on Windows. +# Note: src/Makefile has many more configurable options. +# +# ##### This Makefile is NOT useful for Windows! ##### +# For MSVC, please follow the instructions given in src/msvcbuild.bat. +# For MinGW and Cygwin, cd to src and run make with the Makefile there. # # Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ############################################################################## @@ -13,709 +16,151 @@ MAJVER= 2 MINVER= 1 RELVER= 0 +PREREL= -beta3 +VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL) ABIVER= 5.1 -NODOTABIVER= 51 - -############################################################################## -############################# COMPILER OPTIONS ############################# -############################################################################## -# These options mainly affect the speed of the JIT compiler itself, not the -# speed of the JIT-compiled code. Turn any of the optional settings on by -# removing the '#' in front of them. Make sure you force a full recompile -# with "make clean", followed by "make" if you change any options. -# -DEFAULT_CC = gcc -# -# LuaJIT builds as a native 32 or 64 bit binary by default. -CC= $(DEFAULT_CC) -# -# Use this if you want to force a 32 bit build on a 64 bit multilib OS. -#CC= $(DEFAULT_CC) -m32 -# -# Since the assembler part does NOT maintain a frame pointer, it's pointless -# to slow down the C part by not omitting it. Debugging, tracebacks and -# unwinding are not affected -- the assembler part has frame unwind -# information and GCC emits it where needed (x64) or with -g (see CCDEBUG). -CCOPT= -O2 -fomit-frame-pointer -# Use this if you want to generate a smaller binary (but it's slower): -#CCOPT= -Os -fomit-frame-pointer -# Note: it's no longer recommended to use -O3 with GCC 4.x. -# The I-Cache bloat usually outweighs the benefits from aggressive inlining. -# -# Target-specific compiler options: -# -# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute -# the binaries to a different machine you could also use: -march=native -# -CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse -CCOPT_x64= -CCOPT_arm= -CCOPT_arm64= -CCOPT_ppc= -CCOPT_mips= -# -CCDEBUG= -# Uncomment the next line to generate debug information: -#CCDEBUG= -g -# -CCWARN= -Wall -# Uncomment the next line to enable more warnings: -#CCWARN+= -Wextra -Wdeclaration-after-statement -Wredundant-decls -Wshadow -Wpointer-arith -# -############################################################################## - -############################################################################## -################################ BUILD MODE ################################ -############################################################################## -# The default build mode is mixed mode on POSIX. On Windows this is the same -# as dynamic mode. -# -# Mixed mode creates a static + dynamic library and a statically linked luajit. -BUILDMODE= mixed -# -# Static mode creates a static library and a statically linked luajit. -#BUILDMODE= static -# -# Dynamic mode creates a dynamic library and a dynamically linked luajit. -# Note: this executable will only run when the library is installed! -#BUILDMODE= dynamic -# -############################################################################## ############################################################################## -################################# FEATURES ################################# -############################################################################## -# Enable/disable these features as needed, but make sure you force a full -# recompile with "make clean", followed by "make". -XCFLAGS= # -# Permanently disable the FFI extension to reduce the size of the LuaJIT -# executable. But please consider that the FFI library is compiled-in, -# but NOT loaded by default. It only allocates any memory, if you actually -# make use of it. -#XCFLAGS+= -DLUAJIT_DISABLE_FFI -# -# Features from Lua 5.2 that are unlikely to break existing code are -# enabled by default. Some other features that *might* break some existing -# code (e.g. __pairs or os.execute() return values) can be enabled here. -# Note: this does not provide full compatibility with Lua 5.2 at this time. -#XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT -# -# Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter. -#XCFLAGS+= -DLUAJIT_DISABLE_JIT -# -# Some architectures (e.g. PPC) can use either single-number (1) or -# dual-number (2) mode. Uncomment one of these lines to override the -# default mode. Please see LJ_ARCH_NUMMODE in lj_arch.h for details. -#XCFLAGS+= -DLUAJIT_NUMMODE=1 -#XCFLAGS+= -DLUAJIT_NUMMODE=2 -# -# Enable GC64 mode for x64. -#XCFLAGS+= -DLUAJIT_ENABLE_GC64 -# -############################################################################## - -############################################################################## -############################ DEBUGGING SUPPORT ############################# -############################################################################## -# Enable these options as needed, but make sure you force a full recompile -# with "make clean", followed by "make". -# Note that most of these are NOT suitable for benchmarking or release mode! -# -# Use the system provided memory allocator (realloc) instead of the -# bundled memory allocator. This is slower, but sometimes helpful for -# debugging. This option cannot be enabled on x64 without GC64, since -# realloc usually doesn't return addresses in the right address range. -# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and -# the only way to get useful results from it for all other architectures. -#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC -# -# This define is required to run LuaJIT under Valgrind. The Valgrind -# header files must be installed. You should enable debug information, too. -# Use --suppressions=lj.supp to avoid some false positives. -#XCFLAGS+= -DLUAJIT_USE_VALGRIND -# -# This is the client for the GDB JIT API. GDB 7.0 or higher is required -# to make use of it. See lj_gdbjit.c for details. Enabling this causes -# a non-negligible overhead, even when not running under GDB. -#XCFLAGS+= -DLUAJIT_USE_GDBJIT -# -# Turn on assertions for the Lua/C API to debug problems with lua_* calls. -# This is rather slow -- use only while developing C libraries/embeddings. -#XCFLAGS+= -DLUA_USE_APICHECK -# -# Turn on assertions for the whole LuaJIT VM. This significantly slows down -# everything. Use only if you suspect a problem with LuaJIT itself. -#XCFLAGS+= -DLUA_USE_ASSERT -# -############################################################################## -# You probably don't need to change anything below this line! -############################################################################## - -############################################################################## -# Host system detection. -############################################################################## - -ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM)) - HOST_SYS= Windows - HOST_RM= del -else +# Change the installation path as needed. This automatically adjusts +# the paths in src/luaconf.h, too. Note: PREFIX must be an absolute path! +# +export PREFIX= /usr/local +export MULTILIB= lib +############################################################################## + +DPREFIX= $(DESTDIR)$(PREFIX) +INSTALL_BIN= $(DPREFIX)/bin +INSTALL_LIB= $(DPREFIX)/$(MULTILIB) +INSTALL_SHARE= $(DPREFIX)/share +INSTALL_INC= $(DPREFIX)/include/luajit-$(MAJVER).$(MINVER) + +INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(VERSION) +INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit +INSTALL_LMODD= $(INSTALL_SHARE)/lua +INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER) +INSTALL_CMODD= $(INSTALL_LIB)/lua +INSTALL_CMOD= $(INSTALL_CMODD)/$(ABIVER) +INSTALL_MAN= $(INSTALL_SHARE)/man/man1 +INSTALL_PKGCONFIG= $(INSTALL_LIB)/pkgconfig + +INSTALL_TNAME= luajit-$(VERSION) +INSTALL_TSYMNAME= luajit +INSTALL_ANAME= libluajit-$(ABIVER).a +INSTALL_SOSHORT1= libluajit-$(ABIVER).so +INSTALL_SOSHORT2= libluajit-$(ABIVER).so.$(MAJVER) +INSTALL_SONAME= $(INSTALL_SOSHORT2).$(MINVER).$(RELVER) +INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib +INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib +INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib +INSTALL_PCNAME= luajit.pc + +INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME) +INSTALL_DYN= $(INSTALL_LIB)/$(INSTALL_SONAME) +INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_SOSHORT1) +INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_SOSHORT2) +INSTALL_T= $(INSTALL_BIN)/$(INSTALL_TNAME) +INSTALL_TSYM= $(INSTALL_BIN)/$(INSTALL_TSYMNAME) +INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME) + +INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \ + $(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD) +UNINSTALL_DIRS= $(INSTALL_JITLIB) $(INSTALL_LJLIBD) $(INSTALL_INC) \ + $(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD) + +RM= rm -f +MKDIR= mkdir -p +RMDIR= rmdir 2>/dev/null +SYMLINK= ln -sf +INSTALL_X= install -m 0755 +INSTALL_F= install -m 0644 +UNINSTALL= $(RM) +LDCONFIG= ldconfig -n +SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \ + -e "s|^multilib=.*|multilib=$(MULTILIB)|" + +FILE_T= luajit +FILE_A= libluajit.a +FILE_SO= libluajit.so +FILE_MAN= luajit.1 +FILE_PC= luajit.pc +FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h +FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ + dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ + dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ + dis_mips64.lua dis_mips64el.lua vmdef.lua + +ifeq (,$(findstring Windows,$(OS))) HOST_SYS:= $(shell uname -s) - ifneq (,$(findstring MINGW,$(HOST_SYS))) - HOST_SYS= Windows - HOST_MSYS= mingw - endif - ifneq (,$(findstring MSYS,$(HOST_SYS))) - HOST_SYS= Windows - HOST_MSYS= mingw - endif - ifneq (,$(findstring CYGWIN,$(HOST_SYS))) - HOST_SYS= Windows - HOST_MSYS= cygwin - endif -endif - -############################################################################## -# Flags and options for host and target. -############################################################################## - -# You can override the following variables at the make command line: -# CC HOST_CC STATIC_CC DYNAMIC_CC -# CFLAGS HOST_CFLAGS TARGET_CFLAGS -# LDFLAGS HOST_LDFLAGS TARGET_LDFLAGS TARGET_SHLDFLAGS -# LIBS HOST_LIBS TARGET_LIBS -# CROSS HOST_SYS TARGET_SYS TARGET_FLAGS -# -# Cross-compilation examples: -# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows -# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- - -ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) -CCOPTIONS= $(CCDEBUG) $(ASOPTIONS) -LDOPTIONS= $(CCDEBUG) $(LDFLAGS) - -HOST_CC= $(CC) -HOST_RM?= rm -f -# If left blank, minilua is built and used. You can supply an installed -# copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua -HOST_LUA= - -HOST_XCFLAGS= -I. -HOST_XLDFLAGS= -HOST_XLIBS= -HOST_ACFLAGS= $(CCOPTIONS) $(HOST_XCFLAGS) $(TARGET_ARCH) $(HOST_CFLAGS) -HOST_ALDFLAGS= $(LDOPTIONS) $(HOST_XLDFLAGS) $(HOST_LDFLAGS) -HOST_ALIBS= $(HOST_XLIBS) $(LIBS) $(HOST_LIBS) - -STATIC_CC = $(CROSS)$(CC) -DYNAMIC_CC = $(CROSS)$(CC) -fPIC -TARGET_CC= $(STATIC_CC) -TARGET_STCC= $(STATIC_CC) -TARGET_DYNCC= $(DYNAMIC_CC) -TARGET_LD= $(CROSS)$(CC) -TARGET_AR= $(CROSS)ar rcus -TARGET_STRIP= $(CROSS)strip - -TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) -TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER) -TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib -TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME) -TARGET_DLLNAME= lua$(NODOTABIVER).dll -TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME) -TARGET_DYNXLDOPTS= - -TARGET_LFSFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -TARGET_XCFLAGS= $(TARGET_LFSFLAGS) -U_FORTIFY_SOURCE -TARGET_XLDFLAGS= -TARGET_XLIBS= -lm -TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) -TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) -TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) -TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS) -TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) -TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) - -TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) -ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH))) - TARGET_LJARCH= x64 -else -ifneq (,$(findstring LJ_TARGET_X86 ,$(TARGET_TESTARCH))) - TARGET_LJARCH= x86 -else -ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) - TARGET_LJARCH= arm -else -ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) - ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) - TARGET_ARCH= -D__AARCH64EB__=1 - endif - TARGET_LJARCH= arm64 -else -ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) - ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) - TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE - else - TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE - endif - TARGET_LJARCH= ppc -else -ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) - ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) - TARGET_ARCH= -D__MIPSEL__=1 - endif - ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH))) - TARGET_LJARCH= mips64 - else - TARGET_LJARCH= mips - endif else - $(error Unsupported target architecture) -endif -endif -endif -endif -endif -endif - -ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) - TARGET_SYS= PS3 - TARGET_ARCH+= -D__CELLOS_LV2__ - TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC - TARGET_XLIBS+= -lpthread -endif - -TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH)) -TARGET_ARCH+= $(patsubst %,-DLUAJIT_TARGET=LUAJIT_ARCH_%,$(TARGET_LJARCH)) - -ifneq (,$(PREFIX)) -ifneq (/usr/local,$(PREFIX)) - TARGET_XCFLAGS+= -DLUA_ROOT=\"$(PREFIX)\" - ifneq (/usr,$(PREFIX)) - TARGET_DYNXLDOPTS= -Wl,-rpath,$(TARGET_LIBPATH) - endif -endif -endif -ifneq (,$(MULTILIB)) - TARGET_XCFLAGS+= -DLUA_MULTILIB=\"$(MULTILIB)\" -endif -ifneq (,$(LMULTILIB)) - TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\" + HOST_SYS= Windows endif - -############################################################################## -# Target system detection. -############################################################################## - TARGET_SYS?= $(HOST_SYS) -ifeq (Windows,$(TARGET_SYS)) - TARGET_STRIP+= --strip-unneeded - TARGET_XSHLDFLAGS= -shared - TARGET_DYNXLDOPTS= -else - TARGET_AR+= 2>/dev/null -ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1)) - TARGET_XCFLAGS+= -fno-stack-protector -endif -ifeq (Darwin,$(TARGET_SYS)) - ifeq (,$(MACOSX_DEPLOYMENT_TARGET)) - export MACOSX_DEPLOYMENT_TARGET=10.4 - endif - TARGET_STRIP+= -x - TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC - TARGET_DYNXLDOPTS= - TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) - ifeq (x64,$(TARGET_LJARCH)) - TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000 - TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000 - endif -else -ifeq (iOS,$(TARGET_SYS)) - TARGET_STRIP+= -x - TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC - TARGET_DYNXLDOPTS= - TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) - ifeq (arm64,$(TARGET_LJARCH)) - TARGET_XCFLAGS+= -fno-omit-frame-pointer - endif -else - ifneq (SunOS,$(TARGET_SYS)) - ifneq (PS3,$(TARGET_SYS)) - TARGET_XLDFLAGS+= -Wl,-E - endif - endif - ifeq (Linux,$(TARGET_SYS)) - TARGET_XLIBS+= -ldl - endif - ifeq (GNU/kFreeBSD,$(TARGET_SYS)) - TARGET_XLIBS+= -ldl - endif -endif -endif -endif - -ifneq ($(HOST_SYS),$(TARGET_SYS)) - ifeq (Windows,$(TARGET_SYS)) - HOST_XCFLAGS+= -malign-double -DLUAJIT_OS=LUAJIT_OS_WINDOWS - else - ifeq (Linux,$(TARGET_SYS)) - HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_LINUX - else - ifeq (Darwin,$(TARGET_SYS)) - HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX - else - ifeq (iOS,$(TARGET_SYS)) - HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX - else - HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER - endif - endif - endif - endif -endif -ifneq (,$(CCDEBUG)) - TARGET_STRIP= @: -endif - -############################################################################## -# Files and pathnames. -############################################################################## - -MINILUA_O= host/minilua.o -MINILUA_LIBS= -lm -MINILUA_T= host/minilua -MINILUA_X= $(MINILUA_T) - -ifeq (,$(HOST_LUA)) - HOST_LUA= $(MINILUA_X) - DASM_DEP= $(MINILUA_T) -endif - -DASM_DIR= ../dynasm -DASM= $(HOST_LUA) $(DASM_DIR)/dynasm.lua -DASM_XFLAGS= -DASM_AFLAGS= -DASM_ARCH= $(TARGET_LJARCH) - -ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D ENDIAN_LE -else - DASM_AFLAGS+= -D ENDIAN_BE -endif -ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D P64 -endif -ifneq (,$(findstring LJ_HASJIT 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D JIT -endif -ifneq (,$(findstring LJ_HASFFI 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D FFI -endif -ifneq (,$(findstring LJ_DUALNUM 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D DUALNUM -endif -ifneq (,$(findstring LJ_ARCH_HASFPU 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D FPU - TARGET_ARCH+= -DLJ_ARCH_HASFPU=1 -else - TARGET_ARCH+= -DLJ_ARCH_HASFPU=0 -endif -ifeq (,$(findstring LJ_ABI_SOFTFP 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D HFABI - TARGET_ARCH+= -DLJ_ABI_SOFTFP=0 -else - TARGET_ARCH+= -DLJ_ABI_SOFTFP=1 -endif -ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D NO_UNWIND - TARGET_ARCH+= -DLUAJIT_NO_UNWIND -endif -DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) -ifeq (Windows,$(TARGET_SYS)) - DASM_AFLAGS+= -D WIN -endif -ifeq (x64,$(TARGET_LJARCH)) - ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH))) - DASM_ARCH= x86 - endif -else -ifeq (arm,$(TARGET_LJARCH)) - ifeq (iOS,$(TARGET_SYS)) - DASM_AFLAGS+= -D IOS - endif -else -ifeq (ppc,$(TARGET_LJARCH)) - ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D SQRT - endif - ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D ROUND - endif - ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D GPR64 - endif - ifeq (PS3,$(TARGET_SYS)) - DASM_AFLAGS+= -D PPE -D TOC - endif - ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH))) - DASM_ARCH= ppc64 - endif -endif -endif -endif - -DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) -DASM_DASC= vm_$(DASM_ARCH).dasc - -BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ - host/buildvm_lib.o host/buildvm_fold.o -BUILDVM_T= host/buildvm -BUILDVM_X= $(BUILDVM_T) - -HOST_O= $(MINILUA_O) $(BUILDVM_O) -HOST_T= $(MINILUA_T) $(BUILDVM_T) - -LJVM_S= lj_vm.S -LJVM_O= lj_vm.o -LJVM_BOUT= $(LJVM_S) -LJVM_MODE= elfasm - -LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ - lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o -LJLIB_C= $(LJLIB_O:.o=.c) - -LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ - lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ - lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ - lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ - lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ - lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ - lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ - lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ - lj_asm.o lj_trace.o lj_gdbjit.o \ - lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ - lj_carith.o lj_clib.o lj_cparse.o \ - lj_lib.o lj_alloc.o lib_aux.o \ - $(LJLIB_O) lib_init.o - -LJVMCORE_O= $(LJVM_O) $(LJCORE_O) -LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) - -LIB_VMDEF= jit/vmdef.lua -LIB_VMDEFP= $(LIB_VMDEF) - -LUAJIT_O= luajit.o -LUAJIT_A= libluajit.a -LUAJIT_SO= libluajit.so -LUAJIT_T= luajit - -ALL_T= $(LUAJIT_T) $(LUAJIT_A) $(LUAJIT_SO) $(HOST_T) -ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \ - host/buildvm_arch.h -ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP) -WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk -ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM) - -############################################################################## -# Build mode handling. -############################################################################## - -# Mixed mode defaults. -TARGET_O= $(LUAJIT_A) -TARGET_T= $(LUAJIT_T) $(LUAJIT_SO) -TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO) - -ifeq (Windows,$(TARGET_SYS)) - TARGET_DYNCC= $(STATIC_CC) - LJVM_MODE= peobj - LJVM_BOUT= $(LJVM_O) - LUAJIT_T= luajit.exe - ifeq (cygwin,$(HOST_MSYS)) - LUAJIT_SO= cyg$(TARGET_DLLNAME) - else - LUAJIT_SO= $(TARGET_DLLNAME) - endif - # Mixed mode is not supported on Windows. And static mode doesn't work well. - # C modules cannot be loaded, because they bind to lua51.dll. - ifneq (static,$(BUILDMODE)) - BUILDMODE= dynamic - TARGET_XCFLAGS+= -DLUA_BUILD_AS_DLL - endif -endif ifeq (Darwin,$(TARGET_SYS)) - LJVM_MODE= machasm -endif -ifeq (iOS,$(TARGET_SYS)) - LJVM_MODE= machasm -endif -ifeq (SunOS,$(TARGET_SYS)) - BUILDMODE= static -endif -ifeq (PS3,$(TARGET_SYS)) - BUILDMODE= static -endif - -ifeq (Windows,$(HOST_SYS)) - MINILUA_T= host/minilua.exe - BUILDVM_T= host/buildvm.exe - ifeq (,$(HOST_MSYS)) - MINILUA_X= host\minilua - BUILDVM_X= host\buildvm - ALL_RM:= $(subst /,\,$(ALL_RM)) - endif -endif - -ifeq (static,$(BUILDMODE)) - TARGET_DYNCC= @: - TARGET_T= $(LUAJIT_T) - TARGET_DEP= $(LIB_VMDEF) -else -ifeq (dynamic,$(BUILDMODE)) - ifneq (Windows,$(TARGET_SYS)) - TARGET_CC= $(DYNAMIC_CC) - endif - TARGET_DYNCC= @: - LJVMCORE_DYNO= $(LJVMCORE_O) - TARGET_O= $(LUAJIT_SO) - TARGET_XLDFLAGS+= $(TARGET_DYNXLDOPTS) -else -ifeq (Darwin,$(TARGET_SYS)) - TARGET_DYNCC= @: - LJVMCORE_DYNO= $(LJVMCORE_O) -endif -ifeq (iOS,$(TARGET_SYS)) - TARGET_DYNCC= @: - LJVMCORE_DYNO= $(LJVMCORE_O) -endif -endif -endif - -Q= @ -E= @echo -#Q= -#E= @: - -############################################################################## -# Make targets. -############################################################################## - -default all: $(TARGET_T) - -amalg: - @grep "^[+|]" ljamalg.c - $(MAKE) all "LJCORE_O=ljamalg.o" - -clean: - $(HOST_RM) $(ALL_RM) - -libbc: - ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C) - $(MAKE) all - -depend: - @for file in $(ALL_HDRGEN); do \ - test -f $$file || touch $$file; \ + INSTALL_SONAME= $(INSTALL_DYLIBNAME) + INSTALL_SOSHORT1= $(INSTALL_DYLIBSHORT1) + INSTALL_SOSHORT2= $(INSTALL_DYLIBSHORT2) + LDCONFIG= : +endif + +############################################################################## + +INSTALL_DEP= src/luajit + +default all $(INSTALL_DEP): + @echo "==== Building LuaJIT $(VERSION) ====" + $(MAKE) -C src + @echo "==== Successfully built LuaJIT $(VERSION) ====" + +install: $(INSTALL_DEP) + @echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ====" + $(MKDIR) $(INSTALL_DIRS) + cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) + cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : + $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) + cd src && test -f $(FILE_SO) && \ + $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ + $(LDCONFIG) $(INSTALL_LIB) && \ + $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \ + $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || : + cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN) + cd etc && $(SED_PC) $(FILE_PC) > $(FILE_PC).tmp && \ + $(INSTALL_F) $(FILE_PC).tmp $(INSTALL_PC) && \ + $(RM) $(FILE_PC).tmp + cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) + cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) + @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" + @echo "" + @echo "Note: the development releases deliberately do NOT install a symlink for luajit" + @echo "You can do this now by running this command (with sudo):" + @echo "" + @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)" + @echo "" + + +uninstall: + @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" + $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) + for file in $(FILES_JITLIB); do \ + $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ done - @$(HOST_CC) $(HOST_ACFLAGS) -MM *.c host/*.c | \ - sed -e "s| [^ ]*/dasm_\S*\.h||g" \ - -e "s|^\([^l ]\)|host/\1|" \ - -e "s| lj_target_\S*\.h| lj_target_*.h|g" \ - -e "s| lj_emit_\S*\.h| lj_emit_*.h|g" \ - -e "s| lj_asm_\S*\.h| lj_asm_*.h|g" >Makefile.dep - @for file in $(ALL_HDRGEN); do \ - test -s $$file || $(HOST_RM) $$file; \ + for file in $(FILES_INC); do \ + $(UNINSTALL) $(INSTALL_INC)/$$file; \ done - -.PHONY: default all amalg clean libbc depend - -############################################################################## -# Rules for generated files. -############################################################################## - -$(MINILUA_T): $(MINILUA_O) - $(E) "HOSTLINK $@" - $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) - -host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua - $(E) "DYNASM $@" - $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) - -host/buildvm.o: $(DASM_DIR)/dasm_*.h - -$(BUILDVM_T): $(BUILDVM_O) - $(E) "HOSTLINK $@" - $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(BUILDVM_O) $(HOST_ALIBS) - -$(LJVM_BOUT): $(BUILDVM_T) - $(E) "BUILDVM $@" - $(Q)$(BUILDVM_X) -m $(LJVM_MODE) -o $@ - -lj_bcdef.h: $(BUILDVM_T) $(LJLIB_C) - $(E) "BUILDVM $@" - $(Q)$(BUILDVM_X) -m bcdef -o $@ $(LJLIB_C) - -lj_ffdef.h: $(BUILDVM_T) $(LJLIB_C) - $(E) "BUILDVM $@" - $(Q)$(BUILDVM_X) -m ffdef -o $@ $(LJLIB_C) - -lj_libdef.h: $(BUILDVM_T) $(LJLIB_C) - $(E) "BUILDVM $@" - $(Q)$(BUILDVM_X) -m libdef -o $@ $(LJLIB_C) - -lj_recdef.h: $(BUILDVM_T) $(LJLIB_C) - $(E) "BUILDVM $@" - $(Q)$(BUILDVM_X) -m recdef -o $@ $(LJLIB_C) - -$(LIB_VMDEF): $(BUILDVM_T) $(LJLIB_C) - $(E) "BUILDVM $@" - $(Q)$(BUILDVM_X) -m vmdef -o $(LIB_VMDEFP) $(LJLIB_C) - -lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c - $(E) "BUILDVM $@" - $(Q)$(BUILDVM_X) -m folddef -o $@ lj_opt_fold.c + $(LDCONFIG) $(INSTALL_LIB) + $(RMDIR) $(UNINSTALL_DIRS) || : + @echo "==== Successfully uninstalled LuaJIT $(VERSION) from $(PREFIX) ====" ############################################################################## -# Object file rules. -############################################################################## - -%.o: %.c - $(E) "CC $@" - $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< - $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< - -%.o: %.S - $(E) "ASM $@" - $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $< - $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $< - -$(LUAJIT_O): - $(E) "CC $@" - $(Q)$(TARGET_STCC) $(TARGET_ACFLAGS) -c -o $@ $< - -$(HOST_O): %.o: %.c - $(E) "HOSTCC $@" - $(Q)$(HOST_CC) $(HOST_ACFLAGS) -c -o $@ $< - -include Makefile.dep -############################################################################## -# Target file rules. -############################################################################## - -$(LUAJIT_A): $(LJVMCORE_O) - $(E) "AR $@" - $(Q)$(TARGET_AR) $@ $(LJVMCORE_O) +amalg: + @echo "Building LuaJIT $(VERSION)" + $(MAKE) -C src amalg -# The dependency on _O, but linking with _DYNO is intentional. -$(LUAJIT_SO): $(LJVMCORE_O) - $(E) "DYNLINK $@" - $(Q)$(TARGET_LD) $(TARGET_ASHLDFLAGS) -o $@ $(LJVMCORE_DYNO) $(TARGET_ALIBS) - $(Q)$(TARGET_STRIP) $@ +clean: + $(MAKE) -C src clean -$(LUAJIT_T): $(TARGET_O) $(LUAJIT_O) $(TARGET_DEP) - $(E) "LINK $@" - $(Q)$(TARGET_LD) $(TARGET_ALDFLAGS) -o $@ $(LUAJIT_O) $(TARGET_O) $(TARGET_ALIBS) - $(Q)$(TARGET_STRIP) $@ - $(E) "OK Successfully built LuaJIT" +.PHONY: all install amalg clean ############################################################################## diff --git a/lib/LuaJIT/README b/lib/LuaJIT/README new file mode 100644 index 0000000..2b9ae9d --- /dev/null +++ b/lib/LuaJIT/README @@ -0,0 +1,16 @@ +README for LuaJIT 2.1.0-beta3 +----------------------------- + +LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. + +Project Homepage: http://luajit.org/ + +LuaJIT is Copyright (C) 2005-2017 Mike Pall. +LuaJIT is free software, released under the MIT license. +See full Copyright Notice in the COPYRIGHT file or in luajit.h. + +Documentation for LuaJIT is available in HTML format. +Please point your favorite browser to: + + doc/luajit.html + diff --git a/lib/LuaJIT/doc/bluequad-print.css b/lib/LuaJIT/doc/bluequad-print.css new file mode 100644 index 0000000..d5a3ea3 --- /dev/null +++ b/lib/LuaJIT/doc/bluequad-print.css @@ -0,0 +1,166 @@ +/* Copyright (C) 2004-2018 Mike Pall. + * + * You are welcome to use the general ideas of this design for your own sites. + * But please do not steal the stylesheet, the layout or the color scheme. + */ +body { + font-family: serif; + font-size: 11pt; + margin: 0 3em; + padding: 0; + border: none; +} +a:link, a:visited, a:hover, a:active { + text-decoration: none; + background: transparent; + color: #0000ff; +} +h1, h2, h3 { + font-family: sans-serif; + font-weight: bold; + text-align: left; + margin: 0.5em 0; + padding: 0; +} +h1 { + font-size: 200%; +} +h2 { + font-size: 150%; +} +h3 { + font-size: 125%; +} +p { + margin: 0 0 0.5em 0; + padding: 0; +} +ul, ol { + margin: 0.5em 0; + padding: 0 0 0 2em; +} +ul { + list-style: outside square; +} +ol { + list-style: outside decimal; +} +li { + margin: 0; + padding: 0; +} +dl { + margin: 1em 0; + padding: 1em; + border: 1px solid black; +} +dt { + font-weight: bold; + margin: 0; + padding: 0; +} +dt sup { + float: right; + margin-left: 1em; +} +dd { + margin: 0.5em 0 0 2em; + padding: 0; +} +table { + table-layout: fixed; + width: 100%; + margin: 1em 0; + padding: 0; + border: 1px solid black; + border-spacing: 0; + border-collapse: collapse; +} +tr { + margin: 0; + padding: 0; + border: none; +} +td { + text-align: left; + margin: 0; + padding: 0.2em 0.5em; + border-top: 1px solid black; + border-bottom: 1px solid black; +} +tr.separate td { + border-top: double; +} +tt, pre, code, kbd, samp { + font-family: monospace; + font-size: 75%; +} +kbd { + font-weight: bolder; +} +blockquote, pre { + margin: 1em 2em; + padding: 0; +} +img { + border: none; + vertical-align: baseline; + margin: 0; + padding: 0; +} +img.left { + float: left; + margin: 0.5em 1em 0.5em 0; +} +img.right { + float: right; + margin: 0.5em 0 0.5em 1em; +} +.flush { + clear: both; + visibility: hidden; +} +.hide, .noprint, #nav { + display: none !important; +} +.pagebreak { + page-break-before: always; +} +#site { + text-align: right; + font-family: sans-serif; + font-weight: bold; + margin: 0 1em; + border-bottom: 1pt solid black; +} +#site a { + font-size: 1.2em; +} +#site a:link, #site a:visited { + text-decoration: none; + font-weight: bold; + background: transparent; + color: #ffffff; +} +#logo { + color: #ff8000; +} +#head { + clear: both; + margin: 0 1em; +} +#main { + line-height: 1.3; + text-align: justify; + margin: 1em; +} +#foot { + clear: both; + font-size: 80%; + text-align: center; + margin: 0 1.25em; + padding: 0.5em 0 0 0; + border-top: 1pt solid black; + page-break-before: avoid; + page-break-after: avoid; +} diff --git a/lib/LuaJIT/doc/bluequad.css b/lib/LuaJIT/doc/bluequad.css new file mode 100644 index 0000000..cfc889a --- /dev/null +++ b/lib/LuaJIT/doc/bluequad.css @@ -0,0 +1,325 @@ +/* Copyright (C) 2004-2018 Mike Pall. + * + * You are welcome to use the general ideas of this design for your own sites. + * But please do not steal the stylesheet, the layout or the color scheme. + */ +/* colorscheme: + * + * site | head #4162bf/white | #6078bf/#e6ecff + * ------+------ ----------------+------------------- + * nav | main #bfcfff | #e6ecff/black + * + * nav: hiback loback #c5d5ff #b9c9f9 + * hiborder loborder #e6ecff #97a7d7 + * link hover #2142bf #ff0000 + * + * link: link visited hover #2142bf #8122bf #ff0000 + * + * main: boxback boxborder #f0f4ff #bfcfff + */ +body { + font-family: Verdana, Arial, Helvetica, sans-serif; + font-size: 10pt; + margin: 0; + padding: 0; + border: none; + background: #e0e0e0; + color: #000000; +} +a:link { + text-decoration: none; + background: transparent; + color: #2142bf; +} +a:visited { + text-decoration: none; + background: transparent; + color: #8122bf; +} +a:hover, a:active { + text-decoration: underline; + background: transparent; + color: #ff0000; +} +h1, h2, h3 { + font-weight: bold; + text-align: left; + margin: 0.5em 0; + padding: 0; + background: transparent; +} +h1 { + font-size: 200%; + line-height: 3em; /* really 6em relative to body, match #site span */ + margin: 0; +} +h2 { + font-size: 150%; + color: #606060; +} +h3 { + font-size: 125%; + color: #404040; +} +p { + max-width: 600px; + margin: 0 0 0.5em 0; + padding: 0; +} +b { + color: #404040; +} +ul, ol { + max-width: 600px; + margin: 0.5em 0; + padding: 0 0 0 2em; +} +ul { + list-style: outside square; +} +ol { + list-style: outside decimal; +} +li { + margin: 0; + padding: 0; +} +dl { + max-width: 600px; + margin: 1em 0; + padding: 1em; + border: 1px solid #bfcfff; + background: #f0f4ff; +} +dt { + font-weight: bold; + margin: 0; + padding: 0; +} +dt sup { + float: right; + margin-left: 1em; + color: #808080; +} +dt a:visited { + text-decoration: none; + color: #2142bf; +} +dt a:hover, dt a:active { + text-decoration: none; + color: #ff0000; +} +dd { + margin: 0.5em 0 0 2em; + padding: 0; +} +div.tablewrap { /* for IE *sigh* */ + max-width: 600px; +} +table { + table-layout: fixed; + border-spacing: 0; + border-collapse: collapse; + max-width: 600px; + width: 100%; + margin: 1em 0; + padding: 0; + border: 1px solid #bfcfff; +} +tr { + margin: 0; + padding: 0; + border: none; +} +tr.odd { + background: #f0f4ff; +} +tr.separate td { + border-top: 1px solid #bfcfff; +} +td { + text-align: left; + margin: 0; + padding: 0.2em 0.5em; + border: none; +} +tt, code, kbd, samp { + font-family: Courier New, Courier, monospace; + line-height: 1.2; + font-size: 110%; +} +kbd { + font-weight: bolder; +} +blockquote, pre { + max-width: 600px; + margin: 1em 2em; + padding: 0; +} +pre { + line-height: 1.1; +} +pre.code { + line-height: 1.4; + margin: 0.5em 0 1em 0.5em; + padding: 0.5em 1em; + border: 1px solid #bfcfff; + background: #f0f4ff; +} +pre.mark { + padding-left: 2em; +} +span.codemark { + position:absolute; + left: 16em; + color: #4040c0; +} +span.mark { + color: #4040c0; + font-family: Courier New, Courier, monospace; + line-height: 1.1; +} +img { + border: none; + vertical-align: baseline; + margin: 0; + padding: 0; +} +img.left { + float: left; + margin: 0.5em 1em 0.5em 0; +} +img.right { + float: right; + margin: 0.5em 0 0.5em 1em; +} +.indent { + padding-left: 1em; +} +.flush { + clear: both; + visibility: hidden; +} +.hide, .noscreen { + display: none !important; +} +.ext { + color: #ff8000; +} +.new { + font-size: 6pt; + vertical-align: middle; + background: #ff8000; + color: #ffffff; +} +#site { + clear: both; + float: left; + width: 13em; + text-align: center; + font-weight: bold; + margin: 0; + padding: 0; + background: transparent; + color: #ffffff; +} +#site a { + font-size: 200%; +} +#site a:link, #site a:visited { + text-decoration: none; + font-weight: bold; + background: transparent; + color: #ffffff; +} +#site span { + line-height: 3em; /* really 6em relative to body, match h1 */ +} +#logo { + color: #ffb380; +} +#head { + margin: 0; + padding: 0 0 0 2em; + border-left: solid 13em #4162bf; + border-right: solid 3em #6078bf; + background: #6078bf; + color: #e6ecff; +} +#nav { + clear: both; + float: left; + overflow: hidden; + text-align: left; + line-height: 1.5; + width: 13em; + padding-top: 1em; + background: transparent; +} +#nav ul { + list-style: none outside; + margin: 0; + padding: 0; +} +#nav li { + margin: 0; + padding: 0; +} +#nav a { + display: block; + text-decoration: none; + font-weight: bold; + margin: 0; + padding: 2px 1em; + border-top: 1px solid transparent; + border-bottom: 1px solid transparent; + background: transparent; + color: #2142bf; +} +#nav a:hover, #nav a:active { + text-decoration: none; + border-top: 1px solid #97a7d7; + border-bottom: 1px solid #e6ecff; + background: #b9c9f9; + color: #ff0000; +} +#nav a.current, #nav a.current:hover, #nav a.current:active { + border-top: 1px solid #e6ecff; + border-bottom: 1px solid #97a7d7; + background: #c5d5ff; + color: #2142bf; +} +#nav ul ul a { + padding: 0 1em 0 1.7em; +} +#nav ul ul ul a { + padding: 0 0.5em 0 2.4em; +} +#main { + line-height: 1.5; + text-align: left; + margin: 0; + padding: 1em 2em; + border-left: solid 13em #bfcfff; + border-right: solid 3em #e6ecff; + background: #e6ecff; +} +#foot { + clear: both; + font-size: 80%; + text-align: center; + margin: 0; + padding: 0.5em; + background: #6078bf; + color: #ffffff; +} +#foot a:link, #foot a:visited { + text-decoration: underline; + background: transparent; + color: #ffffff; +} +#foot a:hover, #foot a:active { + text-decoration: underline; + background: transparent; + color: #bfcfff; +} diff --git a/lib/LuaJIT/doc/changes.html b/lib/LuaJIT/doc/changes.html new file mode 100644 index 0000000..1208ac8 --- /dev/null +++ b/lib/LuaJIT/doc/changes.html @@ -0,0 +1,882 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>LuaJIT Change History</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +<style type="text/css"> +div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; } +</style> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>LuaJIT Change History</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a class="current" href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +This is a list of changes between the released versions of LuaJIT.<br> +The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT 2.0.5</strong>.<br> +</p> +<p> +Please check the +<a href="http://luajit.org/changes.html"><span class="ext">»</span> Online Change History</a> +to see whether newer versions are available. +</p> + +<div class="major" style="background: #d0d0ff;"> +<h2 id="LuaJIT-2.1.0-beta3">LuaJIT 2.1.0-beta3 — 2017-05-01</h2> +<ul> +<li>Rewrite memory block allocator.</li> +<li>Add various extension from Lua 5.2/5.3.</li> +<li>Remove old Lua 5.0 compatibility defines.</li> +<li>Set arg table before evaluating <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li> +<li>Fix FOLD rules for <tt>math.abs()</tt> and FP negation.</li> +<li>Fix soft-float <tt>math.abs()</tt> and negation.</li> +<li>Fix formatting of some small denormals at low precision.</li> +<li>LJ_GC64: Add JIT compiler support.</li> +<li>x64/LJ_GC64: Add JIT compiler backend.</li> +<li>x86/x64: Generate BMI2 shifts and rotates, if available.</li> +<li>Windows/x86: Add full exception interoperability.</li> +<li>ARM64: Add big-endian support.</li> +<li>ARM64: Add JIT compiler backend.</li> +<li>MIPS: Fix <tt>TSETR</tt> barrier.</li> +<li>MIPS: Support MIPS16 interlinking.</li> +<li>MIPS soft-float: Fix code generation for <tt>HREF</tt>.</li> +<li>MIPS64: Add MIPS64 hard-float JIT compiler backend.</li> +<li>MIPS64: Add MIPS64 hard-float/soft-float support to interpreter.</li> +<li>FFI: Compile bitfield loads/stores.</li> +<li>Various fixes common with the 2.0 branch.</li> +</ul> + +<h2 id="LuaJIT-2.1.0-beta2">LuaJIT 2.1.0-beta2 — 2016-03-03</h2> +<ul> +<li>Enable trace stitching.</li> +<li>Use internal implementation for converting FP numbers to strings.</li> +<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li> +<li>Add MIPS soft-float support.</li> +<li>Switch MIPS port to dual-number mode.</li> +<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li> +<li>FFI: Add <tt>ssize_t</tt> declaration.</li> +<li>FFI: Parse <tt>#line NN</tt> and <tt>#NN</tt>.</li> +<li>Various minor fixes.</li> +</ul> + +<h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 — 2015-08-25</h2> +<p> +This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0. +Please take a look at the commit history for more details. +</p> +<ul> +<li>Changes to the VM core: +<ul> +<li>Add low-overhead profiler (<tt>-jp</tt>).</li> +<li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li> +<li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li> +<li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li> +<li>Parse binary number literals (<tt>0bxxx</tt>).</li> +</ul></li> +<li>Improvements to the JIT compiler: +<ul> +<li>Add trace stitching (disabled for now).</li> +<li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li> +<li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li> +<li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li> +<li>Compile string concatenations (<tt>BC_CAT</tt>).</li> +<li>Compile <tt>__concat</tt> metamethod.</li> +<li>Various minor optimizations.</li> +</ul></li> +<li>Internal Changes: +<ul> +<li>Add support for embedding LuaJIT bytecode for builtins.</li> +<li>Replace various builtins with embedded bytecode.</li> +<li>Refactor string buffers and string formatting.</li> +<li>Remove obsolete non-truncating number to integer conversions.</li> +</ul></li> +<li>Ports: +<ul> +<li>Add Xbox One port (<tt>LJ_GC64</tt> mode).</li> +<li>ARM64: Add port of the interpreter (<tt>LJ_GC64</tt> mode).</li> +<li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt> mode.</li> +<li>x86/x64: Drop internal x87 math functions. Use libm functions.</li> +<li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li> +<li>PPC/e500: Drop support for this architecture.</li> +</ul></li> +<li>FFI library: +<ul> +<li>FFI: Add 64 bit bitwise operations.</li> +<li>FFI: Compile VLA/VLS and large cdata allocations with default initialization.</li> +<li>FFI: Compile conversions from functions to function pointers.</li> +<li>FFI: Compile lightuserdata to <tt>void *</tt> conversion.</li> +<li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li> +<li>FFI: Add <tt>ffi.typeinfo()</tt>.</li> +</ul></li> +</ul> +</div> + +<div class="major" style="background: #ffffd0;"> +<h2 id="LuaJIT-2.0.5">LuaJIT 2.0.5 — 2017-05-01</h2> +<ul> +<li>Add workaround for MSVC 2015 stdio changes.</li> +<li>Limit mcode alloc probing, depending on the available pool size.</li> +<li>Fix overly restrictive range calculation in mcode allocation.</li> +<li>Fix out-of-scope goto handling in parser.</li> +<li>Remove internal <tt>__mode = "K"</tt> and replace with safe check.</li> +<li>Add "proto" field to <tt>jit.util.funcinfo()</tt>.</li> +<li>Fix GC step size calculation.</li> +<li>Initialize <tt>uv->immutable</tt> for upvalues of loaded chunks.</li> +<li>Fix for cdata vs. non-cdata arithmetics/comparisons.</li> +<li>Drop leftover regs in 'for' iterator assignment, too.</li> +<li>Fix PHI remarking in SINK pass.</li> +<li>Don't try to record outermost <tt>pcall()</tt> return to lower frame.</li> +<li>Add guard for obscure aliasing between open upvalues and SSA slots.</li> +<li>Remove assumption that <tt>lj_math_random_step()</tt> doesn't clobber FPRs.</li> +<li>Fix handling of non-numeric strings in arithmetic coercions.</li> +<li>Fix recording of <tt>select(n, ...)</tt> with off-trace varargs</li> +<li>Fix install for cross-builds.</li> +<li>Don't allocate unused 2nd result register in JIT compiler backend.</li> +<li>Drop marks from replayed instructions when sinking.</li> +<li>Fix unsinking check.</li> +<li>Properly handle OOM in <tt>trace_save()</tt>.</li> +<li>Limit number of arguments given to <tt>io.lines()</tt> and <tt>fp:lines()</tt>.</li> +<li>Fix narrowing of <tt>TOBIT</tt>.</li> +<li>OSX: Fix build with recent XCode.</li> +<li>x86/x64: Don't spill an explicit <tt>REF_BASE</tt> in the IR.</li> +<li>x86/x64: Fix instruction length decoder.</li> +<li>x86/x64: Search for exit jumps with instruction length decoder.</li> +<li>ARM: Fix <tt>BLX</tt> encoding for Thumb interworking calls.</li> +<li>MIPS: Don't use <tt>RID_GP</tt> as a scratch register.</li> +<li>MIPS: Fix emitted code for U32 to float conversion.</li> +<li>MIPS: Backport workaround for compact unwind tables.</li> +<li>MIPS: Fix cross-endian jit.bcsave.</li> +<li>MIPS: Fix <tt>BC_ISNEXT</tt> fallback path.</li> +<li>MIPS: Fix use of ffgccheck delay slots in interpreter.</li> +<li>FFI: Fix FOLD rules for <tt>int64_t</tt> comparisons.</li> +<li>FFI: Fix SPLIT pass for <tt>CONV i64.u64</tt>.</li> +<li>FFI: Fix <tt>ipairs()</tt> recording.</li> +<li>FFI: Don't propagate qualifiers into subtypes of complex.</li> +</ul> + +<h2 id="LuaJIT-2.0.4">LuaJIT 2.0.4 — 2015-05-14</h2> +<ul> +<li>Fix stack check in narrowing optimization.</li> +<li>Fix Lua/C API typecheck error for special indexes.</li> +<li>Fix string to number conversion.</li> +<li>Fix lexer error for chunks without tokens.</li> +<li>Don't compile <tt>IR_RETF</tt> after <tt>CALLT</tt> to ff with-side effects.</li> +<li>Fix <tt>BC_UCLO</tt>/<tt>BC_JMP</tt> join optimization in Lua parser.</li> +<li>Fix corner case in string to number conversion.</li> +<li>Gracefully handle <tt>lua_error()</tt> for a suspended coroutine.</li> +<li>Avoid error messages when building with Clang.</li> +<li>Fix snapshot #0 handling for traces with a stack check on entry.</li> +<li>Fix fused constant loads under high register pressure.</li> +<li>Invalidate backpropagation cache after DCE.</li> +<li>Fix ABC elimination.</li> +<li>Fix debug info for main chunk of stripped bytecode.</li> +<li>Fix FOLD rule for <tt>string.sub(s, ...) == k</tt>.</li> +<li>Fix FOLD rule for <tt>STRREF</tt> of <tt>SNEW</tt>.</li> +<li>Fix frame traversal while searching for error function.</li> +<li>Prevent GC estimate miscalculation due to buffer growth.</li> +<li>Prevent adding side traces for stack checks.</li> +<li>Fix top slot calculation for snapshots with continuations.</li> +<li>Fix check for reuse of SCEV results in <tt>FORL</tt>.</li> +<li>Add PS Vita port.</li> +<li>Fix compatibility issues with Illumos.</li> +<li>Fix DragonFly build (unsupported).</li> +<li>OpenBSD/x86: Better executable memory allocation for W^X mode.</li> +<li>x86: Fix argument checks for <tt>ipairs()</tt> iterator.</li> +<li>x86: <tt>lj_math_random_step()</tt> clobbers XMM regs on OSX Clang.</li> +<li>x86: Fix code generation for unused result of <tt>math.random()</tt>.</li> +<li>x64: Allow building with <tt>LUAJIT_USE_SYSMALLOC</tt> and <tt>LUAJIT_USE_VALGRIND</tt>.</li> +<li>x86/x64: Fix argument check for bit shifts.</li> +<li>x86/x64: Fix code generation for fused test/arith ops.</li> +<li>ARM: Fix write barrier check in <tt>BC_USETS</tt>.</li> +<li>PPC: Fix red zone overflow in machine code generation.</li> +<li>PPC: Don't use <tt>mcrxr</tt> on PPE.</li> +<li>Various archs: Fix excess stack growth in interpreter.</li> +<li>FFI: Fix FOLD rule for <tt>TOBIT</tt> + <tt>CONV num.u32</tt>.</li> +<li>FFI: Prevent DSE across <tt>ffi.string()</tt>.</li> +<li>FFI: No meta fallback when indexing pointer to incomplete struct.</li> +<li>FFI: Fix initialization of unions of subtypes.</li> +<li>FFI: Fix cdata vs. non-cdata arithmetic and comparisons.</li> +<li>FFI: Fix <tt>__index</tt>/<tt>__newindex</tt> metamethod resolution for ctypes.</li> +<li>FFI: Fix compilation of reference field access.</li> +<li>FFI: Fix frame traversal for backtraces with FFI callbacks.</li> +<li>FFI: Fix recording of indexing a struct pointer ctype object itself.</li> +<li>FFI: Allow non-scalar cdata to be compared for equality by address.</li> +<li>FFI: Fix pseudo type conversions for type punning.</li> +</ul> + +<h2 id="LuaJIT-2.0.3">LuaJIT 2.0.3 — 2014-03-12</h2> +<ul> +<li>Add PS4 port.</li> +<li>Add support for multilib distro builds.</li> +<li>Fix OSX build.</li> +<li>Fix MinGW build.</li> +<li>Fix Xbox 360 build.</li> +<li>Improve ULOAD forwarding for open upvalues.</li> +<li>Fix GC steps threshold handling when called by JIT-compiled code.</li> +<li>Fix argument checks for <tt>math.deg()</tt> and <tt>math.rad()</tt>.</li> +<li>Fix <tt>jit.flush(func|true)</tt>.</li> +<li>Respect <tt>jit.off(func)</tt> when returning to a function, too.</li> +<li>Fix compilation of <tt>string.byte(s, nil, n)</tt>.</li> +<li>Fix line number for relocated bytecode after closure fixup</li> +<li>Fix frame traversal for backtraces.</li> +<li>Fix ABC elimination.</li> +<li>Fix handling of redundant PHIs.</li> +<li>Fix snapshot restore for exit to function header.</li> +<li>Fix type punning alias analysis for constified pointers</li> +<li>Fix call unroll checks in the presence of metamethod frames.</li> +<li>Fix initial maxslot for down-recursive traces.</li> +<li>Prevent BASE register coalescing if parent uses <tt>IR_RETF</tt>.</li> +<li>Don't purge modified function from stack slots in <tt>BC_RET</tt>.</li> +<li>Fix recording of <tt>BC_VARG</tt>.</li> +<li>Don't access dangling reference to reallocated IR.</li> +<li>Fix frame depth display for bytecode dump in <tt>-jdump</tt>.</li> +<li>ARM: Fix register allocation when rematerializing FPRs.</li> +<li>x64: Fix store to upvalue for lightuserdata values.</li> +<li>FFI: Add missing GC steps for callback argument conversions.</li> +<li>FFI: Properly unload loaded DLLs.</li> +<li>FFI: Fix argument checks for <tt>ffi.string()</tt>.</li> +<li>FFI/x64: Fix passing of vector arguments to calls.</li> +<li>FFI: Rehash finalizer table after GC cycle, if needed.</li> +<li>FFI: Fix <tt>cts->L</tt> for cdata unsinking in snapshot restore.</li> +</ul> + +<h2 id="LuaJIT-2.0.2">LuaJIT 2.0.2 — 2013-06-03</h2> +<ul> +<li>Fix memory access check for fast string interning.</li> +<li>Fix MSVC intrinsics for older versions.</li> +<li>Add missing GC steps for <tt>io.*</tt> functions.</li> +<li>Fix spurious red zone overflows in machine code generation.</li> +<li>Fix jump-range constrained mcode allocation.</li> +<li>Inhibit DSE for implicit loads via calls.</li> +<li>Fix builtin string to number conversion for overflow digits.</li> +<li>Fix optional argument handling while recording builtins.</li> +<li>Fix optional argument handling in <tt>table.concat()</tt>.</li> +<li>Add partial support for building with MingW64 GCC 4.8-SEH.</li> +<li>Add missing PHI barrier to <tt>string.sub(str, a, b) == kstr</tt> FOLD rule.</li> +<li>Fix compatibility issues with Illumos.</li> +<li>ARM: Fix cache flush/sync for exit stubs of JIT-compiled code.</li> +<li>MIPS: Fix cache flush/sync for JIT-compiled code jump area.</li> +<li>PPC: Add <tt>plt</tt> suffix for external calls from assembler code.</li> +<li>FFI: Fix snapshot substitution in SPLIT pass.</li> +<li>FFI/x86: Fix register allocation for 64 bit comparisons.</li> +<li>FFI: Fix tailcall in lowest frame to C function with bool result.</li> +<li>FFI: Ignore <tt>long</tt> type specifier in <tt>ffi.istype()</tt>.</li> +<li>FFI: Fix calling conventions for 32 bit OSX and iOS simulator (struct returns).</li> +<li>FFI: Fix calling conventions for ARM hard-float EABI (nested structs).</li> +<li>FFI: Improve error messages for arithmetic and comparison operators.</li> +<li>FFI: Insert no-op type conversion for pointer to integer cast.</li> +<li>FFI: Fix unroll limit for <tt>ffi.fill()</tt>.</li> +<li>FFI: Must sink <tt>XBAR</tt> together with <tt>XSTORE</tt>s.</li> +<li>FFI: Preserve intermediate string for <tt>const char *</tt> conversion.</li> +</ul> + +<h2 id="LuaJIT-2.0.1">LuaJIT 2.0.1 — 2013-02-19</h2> +<ul> +<li>Don't clear frame for out-of-memory error.</li> +<li>Leave hook when resume catches error thrown from hook.</li> +<li>Add missing GC steps for template table creation.</li> +<li>Fix discharge order of comparisons in Lua parser.</li> +<li>Improve buffer handling for <tt>io.read()</tt>.</li> +<li>OSX: Add support for Mach-O object files to <tt>-b</tt> option.</li> +<li>Fix PS3 port.</li> +<li>Fix/enable Xbox 360 port.</li> +<li>x86/x64: Always mark ref for shift count as non-weak.</li> +<li>x64: Don't fuse implicitly 32-to-64 extended operands.</li> +<li>ARM: Fix armhf call argument handling.</li> +<li>ARM: Fix code generation for integer math.min/math.max.</li> +<li>PPC/e500: Fix <tt>lj_vm_floor()</tt> for Inf/NaN.</li> +<li>FFI: Change priority of table initializer variants for structs.</li> +<li>FFI: Fix code generation for bool call result check on x86/x64.</li> +<li>FFI: Load FFI library on-demand for bytecode with cdata literals.</li> +<li>FFI: Fix handling of qualified transparent structs/unions.</li> +</ul> + +<h2 id="LuaJIT-2.0.0">LuaJIT 2.0.0 — 2012-11-08</h2> +<ul> +<li>Correctness and completeness: +<ul> + <li>Fix Android/x86 build.</li> + <li>Fix recording of equality comparisons with <tt>__eq</tt> metamethods.</li> + <li>Fix detection of immutable upvalues.</li> + <li>Replace error with PANIC for callbacks from JIT-compiled code.</li> + <li>Fix builtin string to number conversion for <tt>INT_MIN</tt>.</li> + <li>Don't create unneeded array part for template tables.</li> + <li>Fix <tt>CONV.num.int</tt> sinking.</li> + <li>Don't propagate implicitly widened number to index metamethods.</li> + <li>ARM: Fix ordered comparisons of number vs. non-number.</li> + <li>FFI: Fix code generation for replay of sunk float fields.</li> + <li>FFI: Fix signedness of bool.</li> + <li>FFI: Fix recording of bool call result check on x86/x64.</li> + <li>FFI: Fix stack-adjustment for <tt>__thiscall</tt> callbacks.</li> +</ul></li> +</ul> + +<h2 id="LuaJIT-2.0.0-beta11">LuaJIT 2.0.0-beta11 — 2012-10-16</h2> +<ul> +<li>New features: +<ul> + <li>Use ARM VFP instructions, if available (build-time detection).</li> + <li>Add support for ARM hard-float EABI (<tt>armhf</tt>).</li> + <li>Add PS3 port.</li> + <li>Add many features from Lua 5.2, e.g. <tt>goto</tt>/labels. + Refer to <a href="extensions.html#lua52">this list</a>.</li> + <li>FFI: Add parameterized C types.</li> + <li>FFI: Add support for copy constructors.</li> + <li>FFI: Equality comparisons never raise an error (treat as unequal instead).</li> + <li>FFI: Box all accessed or returned enums.</li> + <li>FFI: Check for <tt>__new</tt> metamethod when calling a constructor.</li> + <li>FFI: Handle <tt>__pairs</tt>/<tt>__ipairs</tt> metamethods for cdata objects.</li> + <li>FFI: Convert <tt>io.*</tt> file handle to <tt>FILE *</tt> pointer (but as a <tt>void *</tt>).</li> + <li>FFI: Detect and support type punning through unions.</li> + <li>FFI: Improve various error messages.</li> +</ul></li> +<li>Build-system reorganization: +<ul> + <li>Reorganize directory layout:<br> + <tt>lib/*</tt> → <tt>src/jit/*</tt><br> + <tt>src/buildvm_*.dasc</tt> → <tt>src/vm_*.dasc</tt><br> + <tt>src/buildvm_*.h</tt> → removed<br> + <tt>src/buildvm*</tt> → <tt>src/host/*</tt></li> + <li>Add minified Lua interpreter plus Lua BitOp (<tt>minilua</tt>) to run DynASM.</li> + <li>Change DynASM bit operations to use Lua BitOp</li> + <li>Translate only <tt>vm_*.dasc</tt> for detected target architecture.</li> + <li>Improve target detection for <tt>msvcbuild.bat</tt>.</li> + <li>Fix build issues on Cygwin and MinGW with optional MSys.</li> + <li>Handle cross-compiles with FPU/no-FPU or hard-fp/soft-fp ABI mismatch.</li> + <li>Remove some library functions for no-JIT/no-FFI builds.</li> + <li>Add uninstall target to top-level Makefile.</li> +</ul></li> +<li>Correctness and completeness: +<ul> + <li>Preserve snapshot #0 PC for all traces.</li> + <li>Fix argument checks for <tt>coroutine.create()</tt>.</li> + <li>Command line prints version and JIT status to <tt>stdout</tt>, not <tt>stderr</tt>.</li> + <li>Fix userdata <tt>__gc</tt> separations at Lua state close.</li> + <li>Fix <tt>TDUP</tt> to <tt>HLOAD</tt> forwarding for <tt>LJ_DUALNUM</tt> builds.</li> + <li>Fix buffer check in bytecode writer.</li> + <li>Make <tt>os.date()</tt> thread-safe.</li> + <li>Add missing declarations for MSVC intrinsics.</li> + <li>Fix dispatch table modifications for return hooks.</li> + <li>Workaround for MSVC conversion bug (<tt>double</tt> → <tt>uint32_t</tt> → <tt>int32_t</tt>).</li> + <li>Fix FOLD rule <tt>(i-j)-i => 0-j</tt>.</li> + <li>Never use DWARF unwinder on Windows.</li> + <li>Fix shrinking of direct mapped blocks in builtin allocator.</li> + <li>Limit recursion depth in <tt>string.match()</tt> et al.</li> + <li>Fix late despecialization of <tt>ITERN</tt> after loop has been entered.</li> + <li>Fix <tt>'f'</tt> and <tt>'L'</tt> options for <tt>debug.getinfo()</tt> and <tt>lua_getinfo()</tt>.</li> + <li>Fix <tt>package.searchpath()</tt>.</li> + <li>OSX: Change dylib names to be consistent with other platforms.</li> + <li>Android: Workaround for broken <tt>sprintf("%g", -0.0)</tt>.</li> + <li>x86: Remove support for ancient CPUs without <tt>CMOV</tt> (before Pentium Pro).</li> + <li>x86: Fix register allocation for calls returning register pair.</li> + <li>x86/x64: Fix fusion of unsigned byte comparisons with swapped operands.</li> + <li>ARM: Fix <tt>tonumber()</tt> argument check.</li> + <li>ARM: Fix modulo operator and <tt>math.floor()</tt>/<tt>math.ceil()</tt> for <tt>inf</tt>/<tt>nan</tt>.</li> + <li>ARM: Invoke SPLIT pass for leftover <tt>IR_TOBIT</tt>.</li> + <li>ARM: Fix BASE register coalescing.</li> + <li>PPC: Fix interpreter state setup in callbacks.</li> + <li>PPC: Fix <tt>string.sub()</tt> range check.</li> + <li>MIPS: Support generation of MIPS/MIPSEL bytecode object files.</li> + <li>MIPS: Fix calls to <tt>floor()</tt>/<tt>ceil()</tt><tt>/trunc()</tt>.</li> + <li>ARM/PPC: Detect more target architecture variants.</li> + <li>ARM/PPC/e500/MIPS: Fix tailcalls from fast functions, esp. <tt>tostring()</tt>.</li> + <li>ARM/PPC/MIPS: Fix rematerialization of FP constants.</li> + <li>FFI: Don't call <tt>FreeLibrary()</tt> on our own EXE/DLL.</li> + <li>FFI: Resolve metamethods for constructors, too.</li> + <li>FFI: Properly disable callbacks on iOS (would require executable memory).</li> + <li>FFI: Fix cdecl string parsing during recording.</li> + <li>FFI: Show address pointed to for <tt>tostring(ref)</tt>, too.</li> + <li>FFI: Fix alignment of C call argument/return structure.</li> + <li>FFI: Initialize all fields of standard types.</li> + <li>FFI: Fix callback handling when new C types are declared in callback.</li> + <li>FFI: Fix recording of constructors for pointers.</li> + <li>FFI: Always resolve metamethods for pointers to structs.</li> + <li>FFI: Correctly propagate alignment when interning nested types.</li> +</ul></li> +<li>Structural and performance enhancements: +<ul> + <li>Add allocation sinking and store sinking optimization.</li> + <li>Constify immutable upvalues.</li> + <li>Add builtin string to integer or FP number conversion. Improves cross-platform consistency and correctness.</li> + <li>Create string hash slots in template tables for non-const values, too. Avoids later table resizes.</li> + <li>Eliminate <tt>HREFK</tt> guard for template table references.</li> + <li>Add various new FOLD rules.</li> + <li>Don't use stack unwinding for <tt>lua_yield()</tt> (slow on x64).</li> + <li>ARM, PPC, MIPS: Improve <tt>XLOAD</tt> operand fusion and register hinting.</li> + <li>PPC, MIPS: Compile <tt>math.sqrt()</tt> to sqrt instruction, if available.</li> + <li>FFI: Fold <tt>KPTR</tt> + constant offset in SPLIT pass.</li> + <li>FFI: Optimize/inline <tt>ffi.copy()</tt> and <tt>ffi.fill()</tt>.</li> + <li>FFI: Compile and optimize array/struct copies.</li> + <li>FFI: Compile <tt>ffi.typeof(cdata|ctype)</tt>, <tt>ffi.sizeof()</tt>, <tt>ffi.alignof()</tt>, <tt>ffi.offsetof()</tt> and <tt>ffi.gc()</tt>.</li> +</ul></li> +</ul> + +<h2 id="LuaJIT-2.0.0-beta10">LuaJIT 2.0.0-beta10 — 2012-05-09</h2> +<ul> +<li>New features: +<ul> +<li>The MIPS of LuaJIT is complete. It requires a CPU conforming to the +MIPS32 R1 architecture with hardware FPU. O32 hard-fp ABI, +little-endian or big-endian.</li> +<li>Auto-detect target arch via cross-compiler. No need for +<tt>TARGET=arch</tt> anymore.</li> +<li>Make DynASM compatible with Lua 5.2.</li> +<li>From Lua 5.2: Try <tt>__tostring</tt> metamethod on non-string error +messages..</li> +</ul></li> +<li>Correctness and completeness: +<ul> +<li>Fix parsing of hex literals with exponents.</li> +<li>Fix bytecode dump for certain number constants.</li> +<li>Fix argument type in error message for relative arguments.</li> +<li>Fix argument error handling on Lua stacks without a frame.</li> +<li>Add missing mcode limit check in assembler backend.</li> +<li>Fix compilation on OpenBSD.</li> +<li>Avoid recursive GC steps after GC-triggered trace exit.</li> +<li>Replace <tt><unwind.h></tt> definitions with our own.</li> +<li>Fix OSX build issues. Bump minimum required OSX version to 10.4.</li> +<li>Fix discharge order of comparisons in Lua parser.</li> +<li>Ensure running <tt>__gc</tt> of userdata created in <tt>__gc</tt> +at state close.</li> +<li>Limit number of userdata <tt>__gc</tt> separations at state close.</li> +<li>Fix bytecode <tt>JMP</tt> slot range when optimizing +<tt>and</tt>/<tt>or</tt> with constant LHS.</li> +<li>Fix DSE of <tt>USTORE</tt>.</li> +<li>Make <tt>lua_concat()</tt> work from C hook with partial frame.</li> +<li>Add required PHIs for implicit conversions, e.g. via <tt>XREF</tt> +forwarding.</li> +<li>Add more comparison variants to Valgrind suppressions file.</li> +<li>Disable loading bytecode with an extra header (BOM or <tt>#!</tt>).</li> +<li>Fix PHI stack slot syncing.</li> +<li>ARM: Reorder type/value tests to silence Valgrind.</li> +<li>ARM: Fix register allocation for <tt>ldrd</tt>-optimized +<tt>HREFK</tt>.</li> +<li>ARM: Fix conditional branch fixup for <tt>OBAR</tt>.</li> +<li>ARM: Invoke SPLIT pass for <tt>double</tt> args in FFI call.</li> +<li>ARM: Handle all <tt>CALL*</tt> ops with <tt>double</tt> results in +SPLIT pass.</li> +<li>ARM: Fix rejoin of <tt>POW</tt> in SPLIT pass.</li> +<li>ARM: Fix compilation of <tt>math.sinh</tt>, <tt>math.cosh</tt>, +<tt>math.tanh</tt>.</li> +<li>ARM, PPC: Avoid pointless arg clearing in <tt>BC_IFUNCF</tt>.</li> +<li>PPC: Fix resume after yield from hook.</li> +<li>PPC: Fix argument checking for <tt>rawget()</tt>.</li> +<li>PPC: Fix fusion of floating-point <tt>XLOAD</tt>/<tt>XSTORE</tt>.</li> +<li>PPC: Fix <tt>HREFK</tt> code generation for huge tables.</li> +<li>PPC: Use builtin D-Cache/I-Cache sync code.</li> +</ul></li> +<li>FFI library: +<ul> +<li>Ignore empty statements in <tt>ffi.cdef()</tt>.</li> +<li>Ignore number parsing errors while skipping definitions.</li> +<li>Don't touch frame in callbacks with tailcalls to fast functions.</li> +<li>Fix library unloading on POSIX systems.</li> +<li>Finalize cdata before userdata when closing the state.</li> +<li>Change <tt>ffi.load()</tt> library name resolution for Cygwin.</li> +<li>Fix resolving of function name redirects on Windows/x86.</li> +<li>Fix symbol resolving error messages on Windows.</li> +<li>Fix blacklisting of C functions calling callbacks.</li> +<li>Fix result type of pointer difference.</li> +<li>Use correct PC in FFI metamethod error message.</li> +<li>Allow <tt>'typedef _Bool int BOOL;'</tt> for the Windows API.</li> +<li>Don't record test for bool result of call, if ignored.</li> +</ul></li> +</ul> + +<h2 id="LuaJIT-2.0.0-beta9">LuaJIT 2.0.0-beta9 — 2011-12-14</h2> +<ul> +<li>New features: +<ul> +<li>PPC port of LuaJIT is complete. Default is the dual-number port +(usually faster). Single-number port selectable via <tt>src/Makefile</tt> +at build time.</li> +<li>Add FFI callback support.</li> +<li>Extend <tt>-b</tt> to generate <tt>.c</tt>, <tt>.h</tt> or <tt>.obj/.o</tt> +files with embedded bytecode.</li> +<li>Allow loading embedded bytecode with <tt>require()</tt>.</li> +<li>From Lua 5.2: Change to <tt>'\z'</tt> escape. Reject undefined escape +sequences.</li> +</ul></li> +<li>Correctness and completeness: +<ul> +<li>Fix OSX 10.7 build. Fix <tt>install_name</tt> and versioning on OSX.</li> +<li>Fix iOS build.</li> +<li>Install <tt>dis_arm.lua</tt>, too.</li> +<li>Mark installed shared library as executable.</li> +<li>Add debug option to <tt>msvcbuild.bat</tt> and improve error handling.</li> +<li>Fix data-flow analysis for iterators.</li> +<li>Fix forced unwinding triggered by external unwinder.</li> +<li>Record missing <tt>for</tt> loop slot loads (return to lower frame).</li> +<li>Always use ANSI variants of Windows system functions.</li> +<li>Fix GC barrier for multi-result table constructor (<tt>TSETM</tt>).</li> +<li>Fix/add various FOLD rules.</li> +<li>Add potential PHI for number conversions due to type instability.</li> +<li>Do not eliminate PHIs only referenced from other PHIs.</li> +<li>Correctly anchor implicit number to string conversions in Lua/C API.</li> +<li>Fix various stack limit checks.</li> +<li>x64: Use thread-safe exceptions for external unwinding (GCC platforms).</li> +<li>x64: Fix result type of cdata index conversions.</li> +<li>x64: Fix <tt>math.random()</tt> and <tt>bit.bswap()</tt> code generation.</li> +<li>x64: Fix <tt>lightuserdata</tt> comparisons.</li> +<li>x64: Always extend stack-passed arguments to pointer size.</li> +<li>ARM: Many fixes to code generation backend.</li> +<li>PPC/e500: Fix dispatch for binop metamethods.</li> +<li>PPC/e500: Save/restore condition registers when entering/leaving the VM.</li> +<li>PPC/e500: Fix write barrier in stores of strings to upvalues.</li> +</ul></li> +<li>FFI library: +<ul> +<li>Fix C comment parsing.</li> +<li>Fix snapshot optimization for cdata comparisons.</li> +<li>Fix recording of const/enum lookups in namespaces.</li> +<li>Fix call argument and return handling for <tt>I8/U8/I16/U16</tt> types.</li> +<li>Fix unfused loads of float fields.</li> +<li>Fix <tt>ffi.string()</tt> recording.</li> +<li>Save <tt>GetLastError()</tt> around <tt>ffi.load()</tt> and symbol +resolving, too.</li> +<li>Improve ld script detection in <tt>ffi.load()</tt>.</li> +<li>Record loads/stores to external variables in namespaces.</li> +<li>Compile calls to stdcall, fastcall and vararg functions.</li> +<li>Treat function ctypes like pointers in comparisons.</li> +<li>Resolve <tt>__call</tt> metamethod for pointers, too.</li> +<li>Record C function calls with bool return values.</li> +<li>Record <tt>ffi.errno()</tt>.</li> +<li>x86: Fix number to <tt>uint32_t</tt> conversion rounding.</li> +<li>x86: Fix 64 bit arithmetic in assembler backend.</li> +<li>x64: Fix struct-by-value calling conventions.</li> +<li>ARM: Ensure invocation of SPLIT pass for float conversions.</li> +</ul></li> +<li>Structural and performance enhancements: +<ul> +<li>Display trace types with <tt>-jv</tt> and <tt>-jdump</tt>.</li> +<li>Record isolated calls. But prefer recording loops over calls.</li> +<li>Specialize to prototype for non-monomorphic functions. Solves the +trace-explosion problem for closure-heavy programming styles.</li> +<li>Always generate a portable <tt>vmdef.lua</tt>. Easier for distros.</li> +</ul></li> +</ul> + +<h2 id="LuaJIT-2.0.0-beta8">LuaJIT 2.0.0-beta8 — 2011-06-23</h2> +<ul> +<li>New features: +<ul> +<li>Soft-float ARM port of LuaJIT is complete.</li> +<li>Add support for bytecode loading/saving and <tt>-b</tt> command line +option.</li> +<li>From Lua 5.2: <tt>__len</tt> metamethod for tables +(disabled by default).</li> +</ul></li> +<li>Correctness and completeness: +<ul> +<li>ARM: Misc. fixes for interpreter.</li> +<li>x86/x64: Fix <tt>bit.*</tt> argument checking in interpreter.</li> +<li>Catch early out-of-memory in memory allocator initialization.</li> +<li>Fix data-flow analysis for paths leading to an upvalue close.</li> +<li>Fix check for missing arguments in <tt>string.format()</tt>.</li> +<li>Fix Solaris/x86 build (note: not a supported target).</li> +<li>Fix recording of loops with instable directions in side traces.</li> +<li>x86/x64: Fix fusion of comparisons with <tt>u8</tt>/<tt>u16</tt> +<tt>XLOAD</tt>.</li> +<li>x86/x64: Fix register allocation for variable shifts.</li> +</ul></li> +<li>FFI library: +<ul> +<li>Add <tt>ffi.errno()</tt>. Save <tt>errno</tt>/<tt>GetLastError()</tt> +around allocations etc.</li> +<li>Fix <tt>__gc</tt> for VLA/VLS cdata objects.</li> +<li>Fix recording of casts from 32 bit cdata pointers to integers.</li> +<li><tt>tonumber(cdata)</tt> returns <tt>nil</tt> for non-numbers.</li> +<li>Show address pointed to for <tt>tostring(pointer)</tt>.</li> +<li>Print <tt>NULL</tt> pointers as <tt>"cdata<... *>: NULL"</tt>.</li> +<li>Support <tt>__tostring</tt> metamethod for pointers to structs, too.</li> +</ul></li> +<li>Structural and performance enhancements: +<ul> +<li>More tuning for loop unrolling heuristics.</li> +<li>Flatten and compress in-memory debug info (saves ~70%).</li> +</ul></li> +</ul> + +<h2 id="LuaJIT-2.0.0-beta7">LuaJIT 2.0.0-beta7 — 2011-05-05</h2> +<ul> +<li>New features: +<ul> +<li>ARM port of the LuaJIT interpreter is complete.</li> +<li>FFI library: Add <tt>ffi.gc()</tt>, <tt>ffi.metatype()</tt>, +<tt>ffi.istype()</tt>.</li> +<li>FFI library: Resolve ld script redirection in <tt>ffi.load()</tt>.</li> +<li>From Lua 5.2: <tt>package.searchpath()</tt>, <tt>fp:read("*L")</tt>, +<tt>load(string)</tt>.</li> +<li>From Lua 5.2, disabled by default: empty statement, +<tt>table.unpack()</tt>, modified <tt>coroutine.running()</tt>.</li> +</ul></li> +<li>Correctness and completeness: +<ul> +<li>FFI library: numerous fixes.</li> +<li>Fix type mismatches in store-to-load forwarding.</li> +<li>Fix error handling within metamethods.</li> +<li>Fix <tt>table.maxn()</tt>.</li> +<li>Improve accuracy of <tt>x^-k</tt> on x64.</li> +<li>Fix code generation for Intel Atom in x64 mode.</li> +<li>Fix narrowing of POW.</li> +<li>Fix recording of retried fast functions.</li> +<li>Fix code generation for <tt>bit.bnot()</tt> and multiplies.</li> +<li>Fix error location within cpcall frames.</li> +<li>Add workaround for old libgcc unwind bug.</li> +<li>Fix <tt>lua_yield()</tt> and <tt>getmetatable(lightuserdata)</tt> on x64.</li> +<li>Misc. fixes for PPC/e500 interpreter.</li> +<li>Fix stack slot updates for down-recursion.</li> +</ul></li> +<li>Structural and performance enhancements: +<ul> +<li>Add dual-number mode (int/double) for the VM. Enabled for ARM.</li> +<li>Improve narrowing of arithmetic operators and <tt>for</tt> loops.</li> +<li>Tune loop unrolling heuristics and increase trace recorder limits.</li> +<li>Eliminate dead slots in snapshots using bytecode data-flow analysis.</li> +<li>Avoid phantom stores to proxy tables.</li> +<li>Optimize lookups in empty proxy tables.</li> +<li>Improve bytecode optimization of <tt>and</tt>/<tt>or</tt> operators.</li> +</ul></li> +</ul> + +<h2 id="LuaJIT-2.0.0-beta6">LuaJIT 2.0.0-beta6 — 2011-02-11</h2> +<ul> +<li>New features: +<ul> +<li>PowerPC/e500v2 port of the LuaJIT interpreter is complete.</li> +<li>Various minor features from Lua 5.2: Hex escapes in literals, +<tt>'\*'</tt> escape, reversible <tt>string.format("%q",s)</tt>, +<tt>"%g"</tt> pattern, <tt>table.sort</tt> checks callbacks, +<tt>os.exit(status|true|false[,close])</tt>.</li> +<li>Lua 5.2 <tt>__pairs</tt> and <tt>__ipairs</tt> metamethods +(disabled by default).</li> +<li>Initial release of the FFI library.</li> +</ul></li> +<li>Correctness and completeness: +<ul> +<li>Fix <tt>string.format()</tt> for non-finite numbers.</li> +<li>Fix memory leak when compiled to use the built-in allocator.</li> +<li>x86/x64: Fix unnecessary resize in <tt>TSETM</tt> bytecode.</li> +<li>Fix various GC issues with traces and <tt>jit.flush()</tt>.</li> +<li>x64: Fix fusion of indexes for array references.</li> +<li>x86/x64: Fix stack overflow handling for coroutine results.</li> +<li>Enable low-2GB memory allocation on FreeBSD/x64.</li> +<li>Fix <tt>collectgarbage("count")</tt> result if more than 2GB is in use.</li> +<li>Fix parsing of hex floats.</li> +<li>x86/x64: Fix loop branch inversion with trailing +<tt>HREF+NE/EQ</tt>.</li> +<li>Add <tt>jit.os</tt> string.</li> +<li><tt>coroutine.create()</tt> permits running C functions, too.</li> +<li>Fix OSX build to work with newer ld64 versions.</li> +<li>Fix bytecode optimization of <tt>and</tt>/<tt>or</tt> operators.</li> +</ul></li> +<li>Structural and performance enhancements: +<ul> +<li>Emit specialized bytecode for <tt>pairs()</tt>/<tt>next()</tt>.</li> +<li>Improve bytecode coalescing of <tt>nil</tt> constants.</li> +<li>Compile calls to vararg functions.</li> +<li>Compile <tt>select()</tt>.</li> +<li>Improve alias analysis, esp. for loads from allocations.</li> +<li>Tuning of various compiler heuristics.</li> +<li>Refactor and extend IR conversion instructions.</li> +<li>x86/x64: Various backend enhancements related to the FFI.</li> +<li>Add SPLIT pass to split 64 bit IR instructions for 32 bit CPUs.</li> +</ul></li> +</ul> + +<h2 id="LuaJIT-2.0.0-beta5">LuaJIT 2.0.0-beta5 — 2010-08-24</h2> +<ul> +<li>Correctness and completeness: +<ul> +<li>Fix trace exit dispatch to function headers.</li> +<li>Fix Windows and OSX builds with LUAJIT_DISABLE_JIT.</li> +<li>Reorganize and fix placement of generated machine code on x64.</li> +<li>Fix TNEW in x64 interpreter.</li> +<li>Do not eliminate PHIs for values only referenced from side exits.</li> +<li>OS-independent canonicalization of strings for non-finite numbers.</li> +<li>Fix <tt>string.char()</tt> range check on x64.</li> +<li>Fix <tt>tostring()</tt> resolving within <tt>print()</tt>.</li> +<li>Fix error handling for <tt>next()</tt>.</li> +<li>Fix passing of constant arguments to external calls on x64.</li> +<li>Fix interpreter argument check for two-argument SSE math functions.</li> +<li>Fix C frame chain corruption caused by <tt>lua_cpcall()</tt>.</li> +<li>Fix return from <tt>pcall()</tt> within active hook.</li> +</ul></li> +<li>Structural and performance enhancements: +<ul> +<li>Replace on-trace GC frame syncing with interpreter exit.</li> +<li>Improve hash lookup specialization by not removing dead keys during GC.</li> +<li>Turn traces into true GC objects.</li> +<li>Avoid starting a GC cycle immediately after library init.</li> +<li>Add weak guards to improve dead-code elimination.</li> +<li>Speed up string interning.</li> +</ul></li> +</ul> + +<h2 id="LuaJIT-2.0.0-beta4">LuaJIT 2.0.0-beta4 — 2010-03-28</h2> +<ul> +<li>Correctness and completeness: +<ul> +<li>Fix precondition for on-trace creation of table keys.</li> +<li>Fix <tt>{f()}</tt> on x64 when table is resized.</li> +<li>Fix folding of ordered comparisons with same references.</li> +<li>Fix snapshot restores for multi-result bytecodes.</li> +<li>Fix potential hang when recording bytecode with nested closures.</li> +<li>Fix recording of <tt>getmetatable()</tt>, <tt>tonumber()</tt> and bad argument types.</li> +<li>Fix SLOAD fusion across returns to lower frames.</li> +</ul></li> +<li>Structural and performance enhancements: +<ul> +<li>Add array bounds check elimination. <tt>-Oabc</tt> is enabled by default.</li> +<li>More tuning for x64, e.g. smaller table objects.</li> +</ul></li> +</ul> + +<h2 id="LuaJIT-2.0.0-beta3">LuaJIT 2.0.0-beta3 — 2010-03-07</h2> +<ul> +<li>LuaJIT x64 port: +<ul> +<li>Port integrated memory allocator to Linux/x64, Windows/x64 and OSX/x64.</li> +<li>Port interpreter and JIT compiler to x64.</li> +<li>Port DynASM to x64.</li> +<li>Many 32/64 bit cleanups in the VM.</li> +<li>Allow building the interpreter with either x87 or SSE2 arithmetics.</li> +<li>Add external unwinding and C++ exception interop (default on x64).</li> +</ul></li> +<li>Correctness and completeness: +<ul> +<li>Fix constructor bytecode generation for certain conditional values.</li> +<li>Fix some cases of ordered string comparisons.</li> +<li>Fix <tt>lua_tocfunction()</tt>.</li> +<li>Fix cutoff register in JMP bytecode for some conditional expressions.</li> +<li>Fix PHI marking algorithm for references from variant slots.</li> +<li>Fix <tt>package.cpath</tt> for non-default PREFIX.</li> +<li>Fix DWARF2 frame unwind information for interpreter on OSX.</li> +<li>Drive the GC forward on string allocations in the parser.</li> +<li>Implement call/return hooks (zero-cost if disabled).</li> +<li>Implement yield from C hooks.</li> +<li>Disable JIT compiler on older non-SSE2 CPUs instead of aborting.</li> +</ul></li> +<li>Structural and performance enhancements: +<ul> +<li>Compile recursive code (tail-, up- and down-recursion).</li> +<li>Improve heuristics for bytecode penalties and blacklisting.</li> +<li>Split CALL/FUNC recording and clean up fast function call semantics.</li> +<li>Major redesign of internal function call handling.</li> +<li>Improve FOR loop const specialization and integerness checks.</li> +<li>Switch to pre-initialized stacks. Avoid frame-clearing.</li> +<li>Colocation of prototypes and related data: bytecode, constants, debug info.</li> +<li>Cleanup parser and streamline bytecode generation.</li> +<li>Add support for weak IR references to register allocator.</li> +<li>Switch to compressed, extensible snapshots.</li> +<li>Compile returns to frames below the start frame.</li> +<li>Improve alias analysis of upvalues using a disambiguation hash value.</li> +<li>Compile floor/ceil/trunc to SSE2 helper calls or SSE4.1 instructions.</li> +<li>Add generic C call handling to IR and backend.</li> +<li>Improve KNUM fuse vs. load heuristics.</li> +<li>Compile various <tt>io.*()</tt> functions.</li> +<li>Compile <tt>math.sinh()</tt>, <tt>math.cosh()</tt>, <tt>math.tanh()</tt> +and <tt>math.random()</tt>.</li> +</ul></li> +</ul> + +<h2 id="LuaJIT-2.0.0-beta2">LuaJIT 2.0.0-beta2 — 2009-11-09</h2> +<ul> +<li>Reorganize build system. Build static+shared library on POSIX.</li> +<li>Allow C++ exception conversion on all platforms +using a wrapper function.</li> +<li>Automatically catch C++ exceptions and rethrow Lua error +(DWARF2 only).</li> +<li>Check for the correct x87 FPU precision at strategic points.</li> +<li>Always use wrappers for libm functions.</li> +<li>Resurrect metamethod name strings before copying them.</li> +<li>Mark current trace, even if compiler is idle.</li> +<li>Ensure FILE metatable is created only once.</li> +<li>Fix type comparisons when different integer types are involved.</li> +<li>Fix <tt>getmetatable()</tt> recording.</li> +<li>Fix TDUP with dead keys in template table.</li> +<li><tt>jit.flush(tr)</tt> returns status. +Prevent manual flush of a trace that's still linked.</li> +<li>Improve register allocation heuristics for invariant references.</li> +<li>Compile the push/pop variants of <tt>table.insert()</tt> and +<tt>table.remove()</tt>.</li> +<li>Compatibility with MSVC <tt>link /debug</tt>.</li> +<li>Fix <tt>lua_iscfunction()</tt>.</li> +<li>Fix <tt>math.random()</tt> when compiled with <tt>-fpic</tt> (OSX).</li> +<li>Fix <tt>table.maxn()</tt>.</li> +<li>Bump <tt>MACOSX_DEPLOYMENT_TARGET</tt> to <tt>10.4</tt></li> +<li><tt>luaL_check*()</tt> and <tt>luaL_opt*()</tt> now support +negative arguments, too.<br> +This matches the behavior of Lua 5.1, but not the specification.</li> +</ul> + +<h2 id="LuaJIT-2.0.0-beta1">LuaJIT 2.0.0-beta1 — 2009-10-31</h2> +<ul> +<li>This is the first public release of LuaJIT 2.0.</li> +<li>The whole VM has been rewritten from the ground up, so there's +no point in listing differences over earlier versions.</li> +</ul> +</div> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/contact.html b/lib/LuaJIT/doc/contact.html new file mode 100644 index 0000000..2aace4b --- /dev/null +++ b/lib/LuaJIT/doc/contact.html @@ -0,0 +1,110 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>Contact</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>Contact</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +If you want to report bugs, propose fixes or suggest enhancements, +please use the +<a href="https://github.com/LuaJIT/LuaJIT/issues">GitHub issue tracker</a>. +</p> +<p> +Please send general questions to the +<a href="http://luajit.org/list.html"><span class="ext">»</span> LuaJIT mailing list</a>. +</p> +<p> +You can also send any questions you have directly to me: +</p> + +<script type="text/javascript"> +<!-- +var xS="@-:\" .0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ<abc>defghijklmnopqrstuvwxyz";function xD(s) +{var len=s.length;var r="";for(var i=0;i<len;i++) +{var c=s.charAt(i);var n=xS.indexOf(c);if(n!=-1)c=xS.charAt(69-n);r+=c;} +document.write("<"+"p>"+r+"<"+"/p>\n");} +//--> +</script> +<script type="text/javascript"> +<!-- +xD("fyZKB8xv\"FJytmz8.KAB0u52D") +//--></script> +<noscript> +<p><img src="img/contact.png" alt="Contact info in image" width="170" height="13"> +</p> +</noscript> + +<h2>Copyright</h2> +<p> +All documentation is +Copyright © 2005-2018 Mike Pall. +</p> + + +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/ext_c_api.html b/lib/LuaJIT/doc/ext_c_api.html new file mode 100644 index 0000000..f27b956 --- /dev/null +++ b/lib/LuaJIT/doc/ext_c_api.html @@ -0,0 +1,188 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>Lua/C API Extensions</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>Lua/C API Extensions</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a class="current" href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +LuaJIT adds some extensions to the standard Lua/C API. The LuaJIT include +directory must be in the compiler search path (<tt>-I<i>path</i></tt>) +to be able to include the required header for C code: +</p> +<pre class="code"> +#include "luajit.h" +</pre> +<p> +Or for C++ code: +</p> +<pre class="code"> +#include "lua.hpp" +</pre> + +<h2 id="luaJIT_setmode"><tt>luaJIT_setmode(L, idx, mode)</tt> +— Control VM</h2> +<p> +This is a C API extension to allow control of the VM from C code. The +full prototype of <tt>LuaJIT_setmode</tt> is: +</p> +<pre class="code"> +LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); +</pre> +<p> +The returned status is either success (<tt>1</tt>) or failure (<tt>0</tt>). +The second argument is either <tt>0</tt> or a stack index (similar to the +other Lua/C API functions). +</p> +<p> +The third argument specifies the mode, which is 'or'ed with a flag. +The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature off, +<tt>LUAJIT_MODE_ON</tt> to turn a feature on, or +<tt>LUAJIT_MODE_FLUSH</tt> to flush cached code. +</p> +<p> +The following modes are defined: +</p> + +<h3 id="mode_engine"><tt>luaJIT_setmode(L, 0, LUAJIT_MODE_ENGINE|flag)</tt></h3> +<p> +Turn the whole JIT compiler on or off or flush the whole cache of compiled code. +</p> + +<h3 id="mode_func"><tt>luaJIT_setmode(L, idx, LUAJIT_MODE_FUNC|flag)</tt><br> +<tt>luaJIT_setmode(L, idx, LUAJIT_MODE_ALLFUNC|flag)</tt><br> +<tt>luaJIT_setmode(L, idx, LUAJIT_MODE_ALLSUBFUNC|flag)</tt></h3> +<p> +This sets the mode for the function at the stack index <tt>idx</tt> or +the parent of the calling function (<tt>idx = 0</tt>). It either +enables JIT compilation for a function, disables it and flushes any +already compiled code or only flushes already compiled code. This +applies recursively to all sub-functions of the function with +<tt>LUAJIT_MODE_ALLFUNC</tt> or only to the sub-functions with +<tt>LUAJIT_MODE_ALLSUBFUNC</tt>. +</p> + +<h3 id="mode_trace"><tt>luaJIT_setmode(L, trace,<br> + LUAJIT_MODE_TRACE|LUAJIT_MODE_FLUSH)</tt></h3> +<p> +Flushes the specified root trace and all of its side traces from the cache. +The code for the trace will be retained as long as there are any other +traces which link to it. +</p> + +<h3 id="mode_wrapcfunc"><tt>luaJIT_setmode(L, idx, LUAJIT_MODE_WRAPCFUNC|flag)</tt></h3> +<p> +This mode defines a wrapper function for calls to C functions. If +called with <tt>LUAJIT_MODE_ON</tt>, the stack index at <tt>idx</tt> +must be a <tt>lightuserdata</tt> object holding a pointer to the wrapper +function. From now on all C functions are called through the wrapper +function. If called with <tt>LUAJIT_MODE_OFF</tt> this mode is turned +off and all C functions are directly called. +</p> +<p> +The wrapper function can be used for debugging purposes or to catch +and convert foreign exceptions. But please read the section on +<a href="extensions.html#exceptions">C++ exception interoperability</a> +first. Recommended usage can be seen in this C++ code excerpt: +</p> +<pre class="code"> +#include <exception> +#include "lua.hpp" + +// Catch C++ exceptions and convert them to Lua error messages. +// Customize as needed for your own exception classes. +static int wrap_exceptions(lua_State *L, lua_CFunction f) +{ + try { + return f(L); // Call wrapped function and return result. + } catch (const char *s) { // Catch and convert exceptions. + lua_pushstring(L, s); + } catch (std::exception& e) { + lua_pushstring(L, e.what()); + } catch (...) { + lua_pushliteral(L, "caught (...)"); + } + return lua_error(L); // Rethrow as a Lua error. +} + +static int myinit(lua_State *L) +{ + ... + // Define wrapper function and enable it. + lua_pushlightuserdata(L, (void *)wrap_exceptions); + luaJIT_setmode(L, -1, LUAJIT_MODE_WRAPCFUNC|LUAJIT_MODE_ON); + lua_pop(L, 1); + ... +} +</pre> +<p> +Note that you can only define <b>a single global wrapper function</b>, +so be careful when using this mechanism from multiple C++ modules. +Also note that this mechanism is not without overhead. +</p> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/ext_ffi.html b/lib/LuaJIT/doc/ext_ffi.html new file mode 100644 index 0000000..de57bcf --- /dev/null +++ b/lib/LuaJIT/doc/ext_ffi.html @@ -0,0 +1,331 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>FFI Library</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>FFI Library</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a class="current" href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> + +The FFI library allows <b>calling external C functions</b> and +<b>using C data structures</b> from pure Lua code. + +</p> +<p> + +The FFI library largely obviates the need to write tedious manual +Lua/C bindings in C. No need to learn a separate binding language +— <b>it parses plain C declarations!</b> These can be +cut-n-pasted from C header files or reference manuals. It's up to +the task of binding large libraries without the need for dealing with +fragile binding generators. + +</p> +<p> +The FFI library is tightly integrated into LuaJIT (it's not available +as a separate module). The code generated by the JIT-compiler for +accesses to C data structures from Lua code is on par with the +code a C compiler would generate. Calls to C functions can +be inlined in JIT-compiled code, unlike calls to functions bound via +the classic Lua/C API. +</p> +<p> +This page gives a short introduction to the usage of the FFI library. +<em>Please use the FFI sub-topics in the navigation bar to learn more.</em> +</p> + +<h2 id="call">Motivating Example: Calling External C Functions</h2> +<p> +It's really easy to call an external C library function: +</p> +<pre class="code mark"> +<span class="codemark">① +② + + +③</span>local ffi = require("ffi") +ffi.cdef[[ +<span style="color:#00a000;">int printf(const char *fmt, ...);</span> +]] +ffi.C.printf("Hello %s!", "world") +</pre> +<p> +So, let's pick that apart: +</p> +<p> +<span class="mark">①</span> Load the FFI library. +</p> +<p> +<span class="mark">②</span> Add a C declaration +for the function. The part inside the double-brackets (in green) is +just standard C syntax. +</p> +<p> +<span class="mark">③</span> Call the named +C function — Yes, it's that simple! +</p> +<p style="font-size: 8pt;"> +Actually, what goes on behind the scenes is far from simple: <span +style="color:#4040c0;">③</span> makes use of the standard +C library namespace <tt>ffi.C</tt>. Indexing this namespace with +a symbol name (<tt>"printf"</tt>) automatically binds it to the +standard C library. The result is a special kind of object which, +when called, runs the <tt>printf</tt> function. The arguments passed +to this function are automatically converted from Lua objects to the +corresponding C types. +</p> +<p> +Ok, so maybe the use of <tt>printf()</tt> wasn't such a spectacular +example. You could have done that with <tt>io.write()</tt> and +<tt>string.format()</tt>, too. But you get the idea ... +</p> +<p> +So here's something to pop up a message box on Windows: +</p> +<pre class="code"> +local ffi = require("ffi") +ffi.cdef[[ +<span style="color:#00a000;">int MessageBoxA(void *w, const char *txt, const char *cap, int type);</span> +]] +ffi.C.MessageBoxA(nil, "Hello world!", "Test", 0) +</pre> +<p> +Bing! Again, that was far too easy, no? +</p> +<p style="font-size: 8pt;"> +Compare this with the effort required to bind that function using the +classic Lua/C API: create an extra C file, add a C function +that retrieves and checks the argument types passed from Lua and calls +the actual C function, add a list of module functions and their +names, add a <tt>luaopen_*</tt> function and register all module +functions, compile and link it into a shared library (DLL), move it to +the proper path, add Lua code that loads the module aaaand ... finally +call the binding function. Phew! +</p> + +<h2 id="cdata">Motivating Example: Using C Data Structures</h2> +<p> +The FFI library allows you to create and access C data +structures. Of course the main use for this is for interfacing with +C functions. But they can be used stand-alone, too. +</p> +<p> +Lua is built upon high-level data types. They are flexible, extensible +and dynamic. That's why we all love Lua so much. Alas, this can be +inefficient for certain tasks, where you'd really want a low-level +data type. E.g. a large array of a fixed structure needs to be +implemented with a big table holding lots of tiny tables. This imposes +both a substantial memory overhead as well as a performance overhead. +</p> +<p> +Here's a sketch of a library that operates on color images plus a +simple benchmark. First, the plain Lua version: +</p> +<pre class="code"> +local floor = math.floor + +local function image_ramp_green(n) + local img = {} + local f = 255/(n-1) + for i=1,n do + img[i] = { red = 0, green = floor((i-1)*f), blue = 0, alpha = 255 } + end + return img +end + +local function image_to_grey(img, n) + for i=1,n do + local y = floor(0.3*img[i].red + 0.59*img[i].green + 0.11*img[i].blue) + img[i].red = y; img[i].green = y; img[i].blue = y + end +end + +local N = 400*400 +local img = image_ramp_green(N) +for i=1,1000 do + image_to_grey(img, N) +end +</pre> +<p> +This creates a table with 160.000 pixels, each of which is a table +holding four number values in the range of 0-255. First an image with +a green ramp is created (1D for simplicity), then the image is +converted to greyscale 1000 times. Yes, that's silly, but I was in +need of a simple example ... +</p> +<p> +And here's the FFI version. The modified parts have been marked in +bold: +</p> +<pre class="code mark"> +<span class="codemark">① + + + + + +② + +③ +④ + + + + + + +③ +⑤</span><b>local ffi = require("ffi") +ffi.cdef[[ +</b><span style="color:#00a000;">typedef struct { uint8_t red, green, blue, alpha; } rgba_pixel;</span><b> +]]</b> + +local function image_ramp_green(n) + <b>local img = ffi.new("rgba_pixel[?]", n)</b> + local f = 255/(n-1) + for i=<b>0,n-1</b> do + <b>img[i].green = i*f</b> + <b>img[i].alpha = 255</b> + end + return img +end + +local function image_to_grey(img, n) + for i=<b>0,n-1</b> do + local y = <b>0.3*img[i].red + 0.59*img[i].green + 0.11*img[i].blue</b> + img[i].red = y; img[i].green = y; img[i].blue = y + end +end + +local N = 400*400 +local img = image_ramp_green(N) +for i=1,1000 do + image_to_grey(img, N) +end +</pre> +<p> +Ok, so that wasn't too difficult: +</p> +<p> +<span class="mark">①</span> First, load the FFI +library and declare the low-level data type. Here we choose a +<tt>struct</tt> which holds four byte fields, one for each component +of a 4x8 bit RGBA pixel. +</p> +<p> +<span class="mark">②</span> Creating the data +structure with <tt>ffi.new()</tt> is straightforward — the +<tt>'?'</tt> is a placeholder for the number of elements of a +variable-length array. +</p> +<p> +<span class="mark">③</span> C arrays are +zero-based, so the indexes have to run from <tt>0</tt> to +<tt>n-1</tt>. One might want to allocate one more element instead to +simplify converting legacy code. +</p> +<p> +<span class="mark">④</span> Since <tt>ffi.new()</tt> +zero-fills the array by default, we only need to set the green and the +alpha fields. +</p> +<p> +<span class="mark">⑤</span> The calls to +<tt>math.floor()</tt> can be omitted here, because floating-point +numbers are already truncated towards zero when converting them to an +integer. This happens implicitly when the number is stored in the +fields of each pixel. +</p> +<p> +Now let's have a look at the impact of the changes: first, memory +consumption for the image is down from 22 Megabytes to +640 Kilobytes (400*400*4 bytes). That's a factor of 35x less! So, +yes, tables do have a noticeable overhead. BTW: The original program +would consume 40 Megabytes in plain Lua (on x64). +</p> +<p> +Next, performance: the pure Lua version runs in 9.57 seconds (52.9 +seconds with the Lua interpreter) and the FFI version runs in 0.48 +seconds on my machine (YMMV). That's a factor of 20x faster (110x +faster than the Lua interpreter). +</p> +<p style="font-size: 8pt;"> +The avid reader may notice that converting the pure Lua version over +to use array indexes for the colors (<tt>[1]</tt> instead of +<tt>.red</tt>, <tt>[2]</tt> instead of <tt>.green</tt> etc.) ought to +be more compact and faster. This is certainly true (by a factor of +~1.7x). Switching to a struct-of-arrays would help, too. +</p> +<p style="font-size: 8pt;"> +However the resulting code would be less idiomatic and rather +error-prone. And it still doesn't get even close to the performance of +the FFI version of the code. Also, high-level data structures cannot +be easily passed to other C functions, especially I/O functions, +without undue conversion penalties. +</p> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/ext_ffi_api.html b/lib/LuaJIT/doc/ext_ffi_api.html new file mode 100644 index 0000000..54ff0ce --- /dev/null +++ b/lib/LuaJIT/doc/ext_ffi_api.html @@ -0,0 +1,571 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>ffi.* API Functions</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +<style type="text/css"> +table.abitable { width: 30em; line-height: 1.2; } +tr.abihead td { font-weight: bold; } +td.abiparam { font-weight: bold; width: 6em; } +</style> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1><tt>ffi.*</tt> API Functions</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a class="current" href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +This page describes the API functions provided by the FFI library in +detail. It's recommended to read through the +<a href="ext_ffi.html">introduction</a> and the +<a href="ext_ffi_tutorial.html">FFI tutorial</a> first. +</p> + +<h2 id="glossary">Glossary</h2> +<ul> +<li><b>cdecl</b> — An abstract C type declaration (a Lua +string).</li> +<li><b>ctype</b> — A C type object. This is a special kind of +<b>cdata</b> returned by <tt>ffi.typeof()</tt>. It serves as a +<b>cdata</b> <a href="#ffi_new">constructor</a> when called.</li> +<li><b>cdata</b> — A C data object. It holds a value of the +corresponding <b>ctype</b>.</li> +<li><b>ct</b> — A C type specification which can be used for +most of the API functions. Either a <b>cdecl</b>, a <b>ctype</b> or a +<b>cdata</b> serving as a template type.</li> +<li><b>cb</b> — A callback object. This is a C data object +holding a special function pointer. Calling this function from +C code runs an associated Lua function.</li> +<li><b>VLA</b> — A variable-length array is declared with a +<tt>?</tt> instead of the number of elements, e.g. <tt>"int[?]"</tt>. +The number of elements (<tt>nelem</tt>) must be given when it's +<a href="#ffi_new">created</a>.</li> +<li><b>VLS</b> — A variable-length struct is a <tt>struct</tt> C +type where the last element is a <b>VLA</b>. The same rules for +declaration and creation apply.</li> +</ul> + +<h2 id="decl">Declaring and Accessing External Symbols</h2> +<p> +External symbols must be declared first and can then be accessed by +indexing a <a href="ext_ffi_semantics.html#clib">C library +namespace</a>, which automatically binds the symbol to a specific +library. +</p> + +<h3 id="ffi_cdef"><tt>ffi.cdef(def)</tt></h3> +<p> +Adds multiple C declarations for types or external symbols (named +variables or functions). <tt>def</tt> must be a Lua string. It's +recommended to use the syntactic sugar for string arguments as +follows: +</p> +<pre class="code"> +ffi.cdef[[ +<span style="color:#00a000;">typedef struct foo { int a, b; } foo_t; // Declare a struct and typedef. +int dofoo(foo_t *f, int n); /* Declare an external C function. */</span> +]] +</pre> +<p> +The contents of the string (the part in green above) must be a +sequence of +<a href="ext_ffi_semantics.html#clang">C declarations</a>, +separated by semicolons. The trailing semicolon for a single +declaration may be omitted. +</p> +<p> +Please note that external symbols are only <em>declared</em>, but they +are <em>not bound</em> to any specific address, yet. Binding is +achieved with C library namespaces (see below). +</p> +<p style="color: #c00000;"> +C declarations are not passed through a C pre-processor, +yet. No pre-processor tokens are allowed, except for +<tt>#pragma pack</tt>. Replace <tt>#define</tt> in existing +C header files with <tt>enum</tt>, <tt>static const</tt> +or <tt>typedef</tt> and/or pass the files through an external +C pre-processor (once). Be careful not to include unneeded or +redundant declarations from unrelated header files. +</p> + +<h3 id="ffi_C"><tt>ffi.C</tt></h3> +<p> +This is the default C library namespace — note the +uppercase <tt>'C'</tt>. It binds to the default set of symbols or +libraries on the target system. These are more or less the same as a +C compiler would offer by default, without specifying extra link +libraries. +</p> +<p> +On POSIX systems, this binds to symbols in the default or global +namespace. This includes all exported symbols from the executable and +any libraries loaded into the global namespace. This includes at least +<tt>libc</tt>, <tt>libm</tt>, <tt>libdl</tt> (on Linux), +<tt>libgcc</tt> (if compiled with GCC), as well as any exported +symbols from the Lua/C API provided by LuaJIT itself. +</p> +<p> +On Windows systems, this binds to symbols exported from the +<tt>*.exe</tt>, the <tt>lua51.dll</tt> (i.e. the Lua/C API +provided by LuaJIT itself), the C runtime library LuaJIT was linked +with (<tt>msvcrt*.dll</tt>), <tt>kernel32.dll</tt>, +<tt>user32.dll</tt> and <tt>gdi32.dll</tt>. +</p> + +<h3 id="ffi_load"><tt>clib = ffi.load(name [,global])</tt></h3> +<p> +This loads the dynamic library given by <tt>name</tt> and returns +a new C library namespace which binds to its symbols. On POSIX +systems, if <tt>global</tt> is <tt>true</tt>, the library symbols are +loaded into the global namespace, too. +</p> +<p> +If <tt>name</tt> is a path, the library is loaded from this path. +Otherwise <tt>name</tt> is canonicalized in a system-dependent way and +searched in the default search path for dynamic libraries: +</p> +<p> +On POSIX systems, if the name contains no dot, the extension +<tt>.so</tt> is appended. Also, the <tt>lib</tt> prefix is prepended +if necessary. So <tt>ffi.load("z")</tt> looks for <tt>"libz.so"</tt> +in the default shared library search path. +</p> +<p> +On Windows systems, if the name contains no dot, the extension +<tt>.dll</tt> is appended. So <tt>ffi.load("ws2_32")</tt> looks for +<tt>"ws2_32.dll"</tt> in the default DLL search path. +</p> + +<h2 id="create">Creating cdata Objects</h2> +<p> +The following API functions create cdata objects (<tt>type()</tt> +returns <tt>"cdata"</tt>). All created cdata objects are +<a href="ext_ffi_semantics.html#gc">garbage collected</a>. +</p> + +<h3 id="ffi_new"><tt>cdata = ffi.new(ct [,nelem] [,init...])<br> +cdata = <em>ctype</em>([nelem,] [init...])</tt></h3> +<p> +Creates a cdata object for the given <tt>ct</tt>. VLA/VLS types +require the <tt>nelem</tt> argument. The second syntax uses a ctype as +a constructor and is otherwise fully equivalent. +</p> +<p> +The cdata object is initialized according to the +<a href="ext_ffi_semantics.html#init">rules for initializers</a>, +using the optional <tt>init</tt> arguments. Excess initializers cause +an error. +</p> +<p> +Performance notice: if you want to create many objects of one kind, +parse the cdecl only once and get its ctype with +<tt>ffi.typeof()</tt>. Then use the ctype as a constructor repeatedly. +</p> +<p style="font-size: 8pt;"> +Please note that an anonymous <tt>struct</tt> declaration implicitly +creates a new and distinguished ctype every time you use it for +<tt>ffi.new()</tt>. This is probably <b>not</b> what you want, +especially if you create more than one cdata object. Different anonymous +<tt>structs</tt> are not considered assignment-compatible by the +C standard, even though they may have the same fields! Also, they +are considered different types by the JIT-compiler, which may cause an +excessive number of traces. It's strongly suggested to either declare +a named <tt>struct</tt> or <tt>typedef</tt> with <tt>ffi.cdef()</tt> +or to create a single ctype object for an anonymous <tt>struct</tt> +with <tt>ffi.typeof()</tt>. +</p> + +<h3 id="ffi_typeof"><tt>ctype = ffi.typeof(ct)</tt></h3> +<p> +Creates a ctype object for the given <tt>ct</tt>. +</p> +<p> +This function is especially useful to parse a cdecl only once and then +use the resulting ctype object as a <a href="#ffi_new">constructor</a>. +</p> + +<h3 id="ffi_cast"><tt>cdata = ffi.cast(ct, init)</tt></h3> +<p> +Creates a scalar cdata object for the given <tt>ct</tt>. The cdata +object is initialized with <tt>init</tt> using the "cast" variant of +the <a href="ext_ffi_semantics.html#convert">C type conversion +rules</a>. +</p> +<p> +This functions is mainly useful to override the pointer compatibility +checks or to convert pointers to addresses or vice versa. +</p> + +<h3 id="ffi_metatype"><tt>ctype = ffi.metatype(ct, metatable)</tt></h3> +<p> +Creates a ctype object for the given <tt>ct</tt> and associates it with +a metatable. Only <tt>struct</tt>/<tt>union</tt> types, complex numbers +and vectors are allowed. Other types may be wrapped in a +<tt>struct</tt>, if needed. +</p> +<p> +The association with a metatable is permanent and cannot be changed +afterwards. Neither the contents of the <tt>metatable</tt> nor the +contents of an <tt>__index</tt> table (if any) may be modified +afterwards. The associated metatable automatically applies to all uses +of this type, no matter how the objects are created or where they +originate from. Note that pre-defined operations on types have +precedence (e.g. declared field names cannot be overriden). +</p> +<p> +All standard Lua metamethods are implemented. These are called directly, +without shortcuts and on any mix of types. For binary operations, the +left operand is checked first for a valid ctype metamethod. The +<tt>__gc</tt> metamethod only applies to <tt>struct</tt>/<tt>union</tt> +types and performs an implicit <a href="#ffi_gc"><tt>ffi.gc()</tt></a> +call during creation of an instance. +</p> + +<h3 id="ffi_gc"><tt>cdata = ffi.gc(cdata, finalizer)</tt></h3> +<p> +Associates a finalizer with a pointer or aggregate cdata object. The +cdata object is returned unchanged. +</p> +<p> +This function allows safe integration of unmanaged resources into the +automatic memory management of the LuaJIT garbage collector. Typical +usage: +</p> +<pre class="code"> +local p = ffi.gc(ffi.C.malloc(n), ffi.C.free) +... +p = nil -- Last reference to p is gone. +-- GC will eventually run finalizer: ffi.C.free(p) +</pre> +<p> +A cdata finalizer works like the <tt>__gc</tt> metamethod for userdata +objects: when the last reference to a cdata object is gone, the +associated finalizer is called with the cdata object as an argument. The +finalizer can be a Lua function or a cdata function or cdata function +pointer. An existing finalizer can be removed by setting a <tt>nil</tt> +finalizer, e.g. right before explicitly deleting a resource: +</p> +<pre class="code"> +ffi.C.free(ffi.gc(p, nil)) -- Manually free the memory. +</pre> + +<h2 id="info">C Type Information</h2> +<p> +The following API functions return information about C types. +They are most useful for inspecting cdata objects. +</p> + +<h3 id="ffi_sizeof"><tt>size = ffi.sizeof(ct [,nelem])</tt></h3> +<p> +Returns the size of <tt>ct</tt> in bytes. Returns <tt>nil</tt> if +the size is not known (e.g. for <tt>"void"</tt> or function types). +Requires <tt>nelem</tt> for VLA/VLS types, except for cdata objects. +</p> + +<h3 id="ffi_alignof"><tt>align = ffi.alignof(ct)</tt></h3> +<p> +Returns the minimum required alignment for <tt>ct</tt> in bytes. +</p> + +<h3 id="ffi_offsetof"><tt>ofs [,bpos,bsize] = ffi.offsetof(ct, field)</tt></h3> +<p> +Returns the offset (in bytes) of <tt>field</tt> relative to the start +of <tt>ct</tt>, which must be a <tt>struct</tt>. Additionally returns +the position and the field size (in bits) for bit fields. +</p> + +<h3 id="ffi_istype"><tt>status = ffi.istype(ct, obj)</tt></h3> +<p> +Returns <tt>true</tt> if <tt>obj</tt> has the C type given by +<tt>ct</tt>. Returns <tt>false</tt> otherwise. +</p> +<p> +C type qualifiers (<tt>const</tt> etc.) are ignored. Pointers are +checked with the standard pointer compatibility rules, but without any +special treatment for <tt>void *</tt>. If <tt>ct</tt> specifies a +<tt>struct</tt>/<tt>union</tt>, then a pointer to this type is accepted, +too. Otherwise the types must match exactly. +</p> +<p> +Note: this function accepts all kinds of Lua objects for the +<tt>obj</tt> argument, but always returns <tt>false</tt> for non-cdata +objects. +</p> + +<h2 id="util">Utility Functions</h2> + +<h3 id="ffi_errno"><tt>err = ffi.errno([newerr])</tt></h3> +<p> +Returns the error number set by the last C function call which +indicated an error condition. If the optional <tt>newerr</tt> argument +is present, the error number is set to the new value and the previous +value is returned. +</p> +<p> +This function offers a portable and OS-independent way to get and set the +error number. Note that only <em>some</em> C functions set the error +number. And it's only significant if the function actually indicated an +error condition (e.g. with a return value of <tt>-1</tt> or +<tt>NULL</tt>). Otherwise, it may or may not contain any previously set +value. +</p> +<p> +You're advised to call this function only when needed and as close as +possible after the return of the related C function. The +<tt>errno</tt> value is preserved across hooks, memory allocations, +invocations of the JIT compiler and other internal VM activity. The same +applies to the value returned by <tt>GetLastError()</tt> on Windows, but +you need to declare and call it yourself. +</p> + +<h3 id="ffi_string"><tt>str = ffi.string(ptr [,len])</tt></h3> +<p> +Creates an interned Lua string from the data pointed to by +<tt>ptr</tt>. +</p> +<p> +If the optional argument <tt>len</tt> is missing, <tt>ptr</tt> is +converted to a <tt>"char *"</tt> and the data is assumed to be +zero-terminated. The length of the string is computed with +<tt>strlen()</tt>. +</p> +<p> +Otherwise <tt>ptr</tt> is converted to a <tt>"void *"</tt> and +<tt>len</tt> gives the length of the data. The data may contain +embedded zeros and need not be byte-oriented (though this may cause +endianess issues). +</p> +<p> +This function is mainly useful to convert (temporary) +<tt>"const char *"</tt> pointers returned by +C functions to Lua strings and store them or pass them to other +functions expecting a Lua string. The Lua string is an (interned) copy +of the data and bears no relation to the original data area anymore. +Lua strings are 8 bit clean and may be used to hold arbitrary, +non-character data. +</p> +<p> +Performance notice: it's faster to pass the length of the string, if +it's known. E.g. when the length is returned by a C call like +<tt>sprintf()</tt>. +</p> + +<h3 id="ffi_copy"><tt>ffi.copy(dst, src, len)<br> +ffi.copy(dst, str)</tt></h3> +<p> +Copies the data pointed to by <tt>src</tt> to <tt>dst</tt>. +<tt>dst</tt> is converted to a <tt>"void *"</tt> and <tt>src</tt> +is converted to a <tt>"const void *"</tt>. +</p> +<p> +In the first syntax, <tt>len</tt> gives the number of bytes to copy. +Caveat: if <tt>src</tt> is a Lua string, then <tt>len</tt> must not +exceed <tt>#src+1</tt>. +</p> +<p> +In the second syntax, the source of the copy must be a Lua string. All +bytes of the string <em>plus a zero-terminator</em> are copied to +<tt>dst</tt> (i.e. <tt>#src+1</tt> bytes). +</p> +<p> +Performance notice: <tt>ffi.copy()</tt> may be used as a faster +(inlinable) replacement for the C library functions +<tt>memcpy()</tt>, <tt>strcpy()</tt> and <tt>strncpy()</tt>. +</p> + +<h3 id="ffi_fill"><tt>ffi.fill(dst, len [,c])</tt></h3> +<p> +Fills the data pointed to by <tt>dst</tt> with <tt>len</tt> constant +bytes, given by <tt>c</tt>. If <tt>c</tt> is omitted, the data is +zero-filled. +</p> +<p> +Performance notice: <tt>ffi.fill()</tt> may be used as a faster +(inlinable) replacement for the C library function +<tt>memset(dst, c, len)</tt>. Please note the different +order of arguments! +</p> + +<h2 id="target">Target-specific Information</h2> + +<h3 id="ffi_abi"><tt>status = ffi.abi(param)</tt></h3> +<p> +Returns <tt>true</tt> if <tt>param</tt> (a Lua string) applies for the +target ABI (Application Binary Interface). Returns <tt>false</tt> +otherwise. The following parameters are currently defined: +</p> +<table class="abitable"> +<tr class="abihead"> +<td class="abiparam">Parameter</td> +<td class="abidesc">Description</td> +</tr> +<tr class="odd separate"> +<td class="abiparam">32bit</td><td class="abidesc">32 bit architecture</td></tr> +<tr class="even"> +<td class="abiparam">64bit</td><td class="abidesc">64 bit architecture</td></tr> +<tr class="odd separate"> +<td class="abiparam">le</td><td class="abidesc">Little-endian architecture</td></tr> +<tr class="even"> +<td class="abiparam">be</td><td class="abidesc">Big-endian architecture</td></tr> +<tr class="odd separate"> +<td class="abiparam">fpu</td><td class="abidesc">Target has a hardware FPU</td></tr> +<tr class="even"> +<td class="abiparam">softfp</td><td class="abidesc">softfp calling conventions</td></tr> +<tr class="odd"> +<td class="abiparam">hardfp</td><td class="abidesc">hardfp calling conventions</td></tr> +<tr class="even separate"> +<td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr> +<tr class="odd"> +<td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr> +<tr class="even"> +<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr> +<tr class="odd"> +<td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr> +</table> + +<h3 id="ffi_os"><tt>ffi.os</tt></h3> +<p> +Contains the target OS name. Same contents as +<a href="ext_jit.html#jit_os"><tt>jit.os</tt></a>. +</p> + +<h3 id="ffi_arch"><tt>ffi.arch</tt></h3> +<p> +Contains the target architecture name. Same contents as +<a href="ext_jit.html#jit_arch"><tt>jit.arch</tt></a>. +</p> + +<h2 id="callback">Methods for Callbacks</h2> +<p> +The C types for <a href="ext_ffi_semantics.html#callback">callbacks</a> +have some extra methods: +</p> + +<h3 id="callback_free"><tt>cb:free()</tt></h3> +<p> +Free the resources associated with a callback. The associated Lua +function is unanchored and may be garbage collected. The callback +function pointer is no longer valid and must not be called anymore +(it may be reused by a subsequently created callback). +</p> + +<h3 id="callback_set"><tt>cb:set(func)</tt></h3> +<p> +Associate a new Lua function with a callback. The C type of the +callback and the callback function pointer are unchanged. +</p> +<p> +This method is useful to dynamically switch the receiver of callbacks +without creating a new callback each time and registering it again (e.g. +with a GUI library). +</p> + +<h2 id="extended">Extended Standard Library Functions</h2> +<p> +The following standard library functions have been extended to work +with cdata objects: +</p> + +<h3 id="tonumber"><tt>n = tonumber(cdata)</tt></h3> +<p> +Converts a number cdata object to a <tt>double</tt> and returns it as +a Lua number. This is particularly useful for boxed 64 bit +integer values. Caveat: this conversion may incur a precision loss. +</p> + +<h3 id="tostring"><tt>s = tostring(cdata)</tt></h3> +<p> +Returns a string representation of the value of 64 bit integers +(<tt><b>"</b>nnn<b>LL"</b></tt> or <tt><b>"</b>nnn<b>ULL"</b></tt>) or +complex numbers (<tt><b>"</b>re±im<b>i"</b></tt>). Otherwise +returns a string representation of the C type of a ctype object +(<tt><b>"ctype<</b>type<b>>"</b></tt>) or a cdata object +(<tt><b>"cdata<</b>type<b>>: </b>address"</tt>), unless you +override it with a <tt>__tostring</tt> metamethod (see +<a href="#ffi_metatype"><tt>ffi.metatype()</tt></a>). +</p> + +<h3 id="pairs"><tt>iter, obj, start = pairs(cdata)<br> +iter, obj, start = ipairs(cdata)<br></tt></h3> +<p> +Calls the <tt>__pairs</tt> or <tt>__ipairs</tt> metamethod of the +corresponding ctype. +</p> + +<h2 id="literals">Extensions to the Lua Parser</h2> +<p> +The parser for Lua source code treats numeric literals with the +suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64 bit +integers. Case doesn't matter, but uppercase is recommended for +readability. It handles decimal (<tt>42LL</tt>), hexadecimal +(<tt>0x2aLL</tt>) and binary (<tt>0b101010LL</tt>) literals. +</p> +<p> +The imaginary part of complex numbers can be specified by suffixing +number literals with <tt>i</tt> or <tt>I</tt>, e.g. <tt>12.5i</tt>. +Caveat: you'll need to use <tt>1i</tt> to get an imaginary part with +the value one, since <tt>i</tt> itself still refers to a variable +named <tt>i</tt>. +</p> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/ext_ffi_semantics.html b/lib/LuaJIT/doc/ext_ffi_semantics.html new file mode 100644 index 0000000..4b03da9 --- /dev/null +++ b/lib/LuaJIT/doc/ext_ffi_semantics.html @@ -0,0 +1,1261 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>FFI Semantics</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +<style type="text/css"> +table.convtable { line-height: 1.2; } +tr.convhead td { font-weight: bold; } +td.convop { font-style: italic; width: 40%; } +</style> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>FFI Semantics</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a class="current" href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +This page describes the detailed semantics underlying the FFI library +and its interaction with both Lua and C code. +</p> +<p> +Given that the FFI library is designed to interface with C code +and that declarations can be written in plain C syntax, <b>it +closely follows the C language semantics</b>, wherever possible. +Some minor concessions are needed for smoother interoperation with Lua +language semantics. +</p> +<p> +Please don't be overwhelmed by the contents of this page — this +is a reference and you may need to consult it, if in doubt. It doesn't +hurt to skim this page, but most of the semantics "just work" as you'd +expect them to work. It should be straightforward to write +applications using the LuaJIT FFI for developers with a C or C++ +background. +</p> + +<h2 id="clang">C Language Support</h2> +<p> +The FFI library has a built-in C parser with a minimal memory +footprint. It's used by the <a href="ext_ffi_api.html">ffi.* library +functions</a> to declare C types or external symbols. +</p> +<p> +It's only purpose is to parse C declarations, as found e.g. in +C header files. Although it does evaluate constant expressions, +it's <em>not</em> a C compiler. The body of <tt>inline</tt> +C function definitions is simply ignored. +</p> +<p> +Also, this is <em>not</em> a validating C parser. It expects and +accepts correctly formed C declarations, but it may choose to +ignore bad declarations or show rather generic error messages. If in +doubt, please check the input against your favorite C compiler. +</p> +<p> +The C parser complies to the <b>C99 language standard</b> plus +the following extensions: +</p> +<ul> + +<li>The <tt>'\e'</tt> escape in character and string literals.</li> + +<li>The C99/C++ boolean type, declared with the keywords <tt>bool</tt> +or <tt>_Bool</tt>.</li> + +<li>Complex numbers, declared with the keywords <tt>complex</tt> or +<tt>_Complex</tt>.</li> + +<li>Two complex number types: <tt>complex</tt> (aka +<tt>complex double</tt>) and <tt>complex float</tt>.</li> + +<li>Vector types, declared with the GCC <tt>mode</tt> or +<tt>vector_size</tt> attribute.</li> + +<li>Unnamed ('transparent') <tt>struct</tt>/<tt>union</tt> fields +inside a <tt>struct</tt>/<tt>union</tt>.</li> + +<li>Incomplete <tt>enum</tt> declarations, handled like incomplete +<tt>struct</tt> declarations.</li> + +<li>Unnamed <tt>enum</tt> fields inside a +<tt>struct</tt>/<tt>union</tt>. This is similar to a scoped C++ +<tt>enum</tt>, except that declared constants are visible in the +global namespace, too.</li> + +<li>Scoped <tt>static const</tt> declarations inside a +<tt>struct</tt>/<tt>union</tt> (from C++).</li> + +<li>Zero-length arrays (<tt>[0]</tt>), empty +<tt>struct</tt>/<tt>union</tt>, variable-length arrays (VLA, +<tt>[?]</tt>) and variable-length structs (VLS, with a trailing +VLA).</li> + +<li>C++ reference types (<tt>int &x</tt>).</li> + +<li>Alternate GCC keywords with '<tt>__</tt>', e.g. +<tt>__const__</tt>.</li> + +<li>GCC <tt>__attribute__</tt> with the following attributes: +<tt>aligned</tt>, <tt>packed</tt>, <tt>mode</tt>, +<tt>vector_size</tt>, <tt>cdecl</tt>, <tt>fastcall</tt>, +<tt>stdcall</tt>, <tt>thiscall</tt>.</li> + +<li>The GCC <tt>__extension__</tt> keyword and the GCC +<tt>__alignof__</tt> operator.</li> + +<li>GCC <tt>__asm__("symname")</tt> symbol name redirection for +function declarations.</li> + +<li>MSVC keywords for fixed-length types: <tt>__int8</tt>, +<tt>__int16</tt>, <tt>__int32</tt> and <tt>__int64</tt>.</li> + +<li>MSVC <tt>__cdecl</tt>, <tt>__fastcall</tt>, <tt>__stdcall</tt>, +<tt>__thiscall</tt>, <tt>__ptr32</tt>, <tt>__ptr64</tt>, +<tt>__declspec(align(n))</tt> and <tt>#pragma pack</tt>.</li> + +<li>All other GCC/MSVC-specific attributes are ignored.</li> + +</ul> +<p> +The following C types are pre-defined by the C parser (like +a <tt>typedef</tt>, except re-declarations will be ignored): +</p> +<ul> + +<li>Vararg handling: <tt>va_list</tt>, <tt>__builtin_va_list</tt>, +<tt>__gnuc_va_list</tt>.</li> + +<li>From <tt><stddef.h></tt>: <tt>ptrdiff_t</tt>, +<tt>size_t</tt>, <tt>wchar_t</tt>.</li> + +<li>From <tt><stdint.h></tt>: <tt>int8_t</tt>, <tt>int16_t</tt>, +<tt>int32_t</tt>, <tt>int64_t</tt>, <tt>uint8_t</tt>, +<tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>, +<tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li> + +<li>From <tt><unistd.h></tt> (POSIX): <tt>ssize_t</tt>.</li> + +</ul> +<p> +You're encouraged to use these types in preference to +compiler-specific extensions or target-dependent standard types. +E.g. <tt>char</tt> differs in signedness and <tt>long</tt> differs in +size, depending on the target architecture and platform ABI. +</p> +<p> +The following C features are <b>not</b> supported: +</p> +<ul> + +<li>A declaration must always have a type specifier; it doesn't +default to an <tt>int</tt> type.</li> + +<li>Old-style empty function declarations (K&R) are not allowed. +All C functions must have a proper prototype declaration. A +function declared without parameters (<tt>int foo();</tt>) is +treated as a function taking zero arguments, like in C++.</li> + +<li>The <tt>long double</tt> C type is parsed correctly, but +there's no support for the related conversions, accesses or arithmetic +operations.</li> + +<li>Wide character strings and character literals are not +supported.</li> + +<li><a href="#status">See below</a> for features that are currently +not implemented.</li> + +</ul> + +<h2 id="convert">C Type Conversion Rules</h2> + +<h3 id="convert_tolua">Conversions from C types to Lua objects</h3> +<p> +These conversion rules apply for <em>read accesses</em> to +C types: indexing pointers, arrays or +<tt>struct</tt>/<tt>union</tt> types; reading external variables or +constant values; retrieving return values from C calls: +</p> +<table class="convtable"> +<tr class="convhead"> +<td class="convin">Input</td> +<td class="convop">Conversion</td> +<td class="convout">Output</td> +</tr> +<tr class="odd separate"> +<td class="convin"><tt>int8_t</tt>, <tt>int16_t</tt></td><td class="convop">→<sup>sign-ext</sup> <tt>int32_t</tt> → <tt>double</tt></td><td class="convout">number</td></tr> +<tr class="even"> +<td class="convin"><tt>uint8_t</tt>, <tt>uint16_t</tt></td><td class="convop">→<sup>zero-ext</sup> <tt>int32_t</tt> → <tt>double</tt></td><td class="convout">number</td></tr> +<tr class="odd"> +<td class="convin"><tt>int32_t</tt>, <tt>uint32_t</tt></td><td class="convop">→ <tt>double</tt></td><td class="convout">number</td></tr> +<tr class="even"> +<td class="convin"><tt>int64_t</tt>, <tt>uint64_t</tt></td><td class="convop">boxed value</td><td class="convout">64 bit int cdata</td></tr> +<tr class="odd separate"> +<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">→ <tt>double</tt></td><td class="convout">number</td></tr> +<tr class="even separate"> +<td class="convin"><tt>bool</tt></td><td class="convop">0 → <tt>false</tt>, otherwise <tt>true</tt></td><td class="convout">boolean</td></tr> +<tr class="odd separate"> +<td class="convin"><tt>enum</tt></td><td class="convop">boxed value</td><td class="convout">enum cdata</td></tr> +<tr class="even"> +<td class="convin">Complex number</td><td class="convop">boxed value</td><td class="convout">complex cdata</td></tr> +<tr class="odd"> +<td class="convin">Vector</td><td class="convop">boxed value</td><td class="convout">vector cdata</td></tr> +<tr class="even"> +<td class="convin">Pointer</td><td class="convop">boxed value</td><td class="convout">pointer cdata</td></tr> +<tr class="odd separate"> +<td class="convin">Array</td><td class="convop">boxed reference</td><td class="convout">reference cdata</td></tr> +<tr class="even"> +<td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">boxed reference</td><td class="convout">reference cdata</td></tr> +</table> +<p> +Bitfields are treated like their underlying type. +</p> +<p> +Reference types are dereferenced <em>before</em> a conversion can take +place — the conversion is applied to the C type pointed to +by the reference. +</p> + +<h3 id="convert_fromlua">Conversions from Lua objects to C types</h3> +<p> +These conversion rules apply for <em>write accesses</em> to +C types: indexing pointers, arrays or +<tt>struct</tt>/<tt>union</tt> types; initializing cdata objects; +casts to C types; writing to external variables; passing +arguments to C calls: +</p> +<table class="convtable"> +<tr class="convhead"> +<td class="convin">Input</td> +<td class="convop">Conversion</td> +<td class="convout">Output</td> +</tr> +<tr class="odd separate"> +<td class="convin">number</td><td class="convop">→</td><td class="convout"><tt>double</tt></td></tr> +<tr class="even"> +<td class="convin">boolean</td><td class="convop"><tt>false</tt> → 0, <tt>true</tt> → 1</td><td class="convout"><tt>bool</tt></td></tr> +<tr class="odd separate"> +<td class="convin">nil</td><td class="convop"><tt>NULL</tt> →</td><td class="convout"><tt>(void *)</tt></td></tr> +<tr class="even"> +<td class="convin">lightuserdata</td><td class="convop">lightuserdata address →</td><td class="convout"><tt>(void *)</tt></td></tr> +<tr class="odd"> +<td class="convin">userdata</td><td class="convop">userdata payload →</td><td class="convout"><tt>(void *)</tt></td></tr> +<tr class="even"> +<td class="convin">io.* file</td><td class="convop">get FILE * handle →</td><td class="convout"><tt>(void *)</tt></td></tr> +<tr class="odd separate"> +<td class="convin">string</td><td class="convop">match against <tt>enum</tt> constant</td><td class="convout"><tt>enum</tt></td></tr> +<tr class="even"> +<td class="convin">string</td><td class="convop">copy string data + zero-byte</td><td class="convout"><tt>int8_t[]</tt>, <tt>uint8_t[]</tt></td></tr> +<tr class="odd"> +<td class="convin">string</td><td class="convop">string data →</td><td class="convout"><tt>const char[]</tt></td></tr> +<tr class="even separate"> +<td class="convin">function</td><td class="convop"><a href="#callback">create callback</a> →</td><td class="convout">C function type</td></tr> +<tr class="odd separate"> +<td class="convin">table</td><td class="convop"><a href="#init_table">table initializer</a></td><td class="convout">Array</td></tr> +<tr class="even"> +<td class="convin">table</td><td class="convop"><a href="#init_table">table initializer</a></td><td class="convout"><tt>struct</tt>/<tt>union</tt></td></tr> +<tr class="odd separate"> +<td class="convin">cdata</td><td class="convop">cdata payload →</td><td class="convout">C type</td></tr> +</table> +<p> +If the result type of this conversion doesn't match the +C type of the destination, the +<a href="#convert_between">conversion rules between C types</a> +are applied. +</p> +<p> +Reference types are immutable after initialization ("no re-seating of +references"). For initialization purposes or when passing values to +reference parameters, they are treated like pointers. Note that unlike +in C++, there's no way to implement automatic reference generation of +variables under the Lua language semantics. If you want to call a +function with a reference parameter, you need to explicitly pass a +one-element array. +</p> + +<h3 id="convert_between">Conversions between C types</h3> +<p> +These conversion rules are more or less the same as the standard +C conversion rules. Some rules only apply to casts, or require +pointer or type compatibility: +</p> +<table class="convtable"> +<tr class="convhead"> +<td class="convin">Input</td> +<td class="convop">Conversion</td> +<td class="convout">Output</td> +</tr> +<tr class="odd separate"> +<td class="convin">Signed integer</td><td class="convop">→<sup>narrow or sign-extend</sup></td><td class="convout">Integer</td></tr> +<tr class="even"> +<td class="convin">Unsigned integer</td><td class="convop">→<sup>narrow or zero-extend</sup></td><td class="convout">Integer</td></tr> +<tr class="odd"> +<td class="convin">Integer</td><td class="convop">→<sup>round</sup></td><td class="convout"><tt>double</tt>, <tt>float</tt></td></tr> +<tr class="even"> +<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">→<sup>trunc</sup> <tt>int32_t</tt> →<sup>narrow</sup></td><td class="convout"><tt>(u)int8_t</tt>, <tt>(u)int16_t</tt></td></tr> +<tr class="odd"> +<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">→<sup>trunc</sup></td><td class="convout"><tt>(u)int32_t</tt>, <tt>(u)int64_t</tt></td></tr> +<tr class="even"> +<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">→<sup>round</sup></td><td class="convout"><tt>float</tt>, <tt>double</tt></td></tr> +<tr class="odd separate"> +<td class="convin">Number</td><td class="convop">n == 0 → 0, otherwise 1</td><td class="convout"><tt>bool</tt></td></tr> +<tr class="even"> +<td class="convin"><tt>bool</tt></td><td class="convop"><tt>false</tt> → 0, <tt>true</tt> → 1</td><td class="convout">Number</td></tr> +<tr class="odd separate"> +<td class="convin">Complex number</td><td class="convop">convert real part</td><td class="convout">Number</td></tr> +<tr class="even"> +<td class="convin">Number</td><td class="convop">convert real part, imag = 0</td><td class="convout">Complex number</td></tr> +<tr class="odd"> +<td class="convin">Complex number</td><td class="convop">convert real and imag part</td><td class="convout">Complex number</td></tr> +<tr class="even separate"> +<td class="convin">Number</td><td class="convop">convert scalar and replicate</td><td class="convout">Vector</td></tr> +<tr class="odd"> +<td class="convin">Vector</td><td class="convop">copy (same size)</td><td class="convout">Vector</td></tr> +<tr class="even separate"> +<td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr> +<tr class="odd"> +<td class="convin">Array</td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr> +<tr class="even"> +<td class="convin">Function</td><td class="convop">take function address</td><td class="convout">Function pointer</td></tr> +<tr class="odd separate"> +<td class="convin">Number</td><td class="convop">convert via <tt>uintptr_t</tt> (cast)</td><td class="convout">Pointer</td></tr> +<tr class="even"> +<td class="convin">Pointer</td><td class="convop">convert address (compat/cast)</td><td class="convout">Pointer</td></tr> +<tr class="odd"> +<td class="convin">Pointer</td><td class="convop">convert address (cast)</td><td class="convout">Integer</td></tr> +<tr class="even"> +<td class="convin">Array</td><td class="convop">convert base address (cast)</td><td class="convout">Integer</td></tr> +<tr class="odd separate"> +<td class="convin">Array</td><td class="convop">copy (compat)</td><td class="convout">Array</td></tr> +<tr class="even"> +<td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">copy (identical type)</td><td class="convout"><tt>struct</tt>/<tt>union</tt></td></tr> +</table> +<p> +Bitfields or <tt>enum</tt> types are treated like their underlying +type. +</p> +<p> +Conversions not listed above will raise an error. E.g. it's not +possible to convert a pointer to a complex number or vice versa. +</p> + +<h3 id="convert_vararg">Conversions for vararg C function arguments</h3> +<p> +The following default conversion rules apply when passing Lua objects +to the variable argument part of vararg C functions: +</p> +<table class="convtable"> +<tr class="convhead"> +<td class="convin">Input</td> +<td class="convop">Conversion</td> +<td class="convout">Output</td> +</tr> +<tr class="odd separate"> +<td class="convin">number</td><td class="convop">→</td><td class="convout"><tt>double</tt></td></tr> +<tr class="even"> +<td class="convin">boolean</td><td class="convop"><tt>false</tt> → 0, <tt>true</tt> → 1</td><td class="convout"><tt>bool</tt></td></tr> +<tr class="odd separate"> +<td class="convin">nil</td><td class="convop"><tt>NULL</tt> →</td><td class="convout"><tt>(void *)</tt></td></tr> +<tr class="even"> +<td class="convin">userdata</td><td class="convop">userdata payload →</td><td class="convout"><tt>(void *)</tt></td></tr> +<tr class="odd"> +<td class="convin">lightuserdata</td><td class="convop">lightuserdata address →</td><td class="convout"><tt>(void *)</tt></td></tr> +<tr class="even separate"> +<td class="convin">string</td><td class="convop">string data →</td><td class="convout"><tt>const char *</tt></td></tr> +<tr class="odd separate"> +<td class="convin"><tt>float</tt> cdata</td><td class="convop">→</td><td class="convout"><tt>double</tt></td></tr> +<tr class="even"> +<td class="convin">Array cdata</td><td class="convop">take base address</td><td class="convout">Element pointer</td></tr> +<tr class="odd"> +<td class="convin"><tt>struct</tt>/<tt>union</tt> cdata</td><td class="convop">take base address</td><td class="convout"><tt>struct</tt>/<tt>union</tt> pointer</td></tr> +<tr class="even"> +<td class="convin">Function cdata</td><td class="convop">take function address</td><td class="convout">Function pointer</td></tr> +<tr class="odd"> +<td class="convin">Any other cdata</td><td class="convop">no conversion</td><td class="convout">C type</td></tr> +</table> +<p> +To pass a Lua object, other than a cdata object, as a specific type, +you need to override the conversion rules: create a temporary cdata +object with a constructor or a cast and initialize it with the value +to pass: +</p> +<p> +Assuming <tt>x</tt> is a Lua number, here's how to pass it as an +integer to a vararg function: +</p> +<pre class="code"> +ffi.cdef[[ +int printf(const char *fmt, ...); +]] +ffi.C.printf("integer value: %d\n", ffi.new("int", x)) +</pre> +<p> +If you don't do this, the default Lua number → <tt>double</tt> +conversion rule applies. A vararg C function expecting an integer +will see a garbled or uninitialized value. +</p> + +<h2 id="init">Initializers</h2> +<p> +Creating a cdata object with +<a href="ext_ffi_api.html#ffi_new"><tt>ffi.new()</tt></a> or the +equivalent constructor syntax always initializes its contents, too. +Different rules apply, depending on the number of optional +initializers and the C types involved: +</p> +<ul> +<li>If no initializers are given, the object is filled with zero bytes.</li> + +<li>Scalar types (numbers and pointers) accept a single initializer. +The Lua object is <a href="#convert_fromlua">converted to the scalar +C type</a>.</li> + +<li>Valarrays (complex numbers and vectors) are treated like scalars +when a single initializer is given. Otherwise they are treated like +regular arrays.</li> + +<li>Aggregate types (arrays and structs) accept either a single cdata +initializer of the same type (copy constructor), a single +<a href="#init_table">table initializer</a>, or a flat list of +initializers.</li> + +<li>The elements of an array are initialized, starting at index zero. +If a single initializer is given for an array, it's repeated for all +remaining elements. This doesn't happen if two or more initializers +are given: all remaining uninitialized elements are filled with zero +bytes.</li> + +<li>Byte arrays may also be initialized with a Lua string. This copies +the whole string plus a terminating zero-byte. The copy stops early only +if the array has a known, fixed size.</li> + +<li>The fields of a <tt>struct</tt> are initialized in the order of +their declaration. Uninitialized fields are filled with zero +bytes.</li> + +<li>Only the first field of a <tt>union</tt> can be initialized with a +flat initializer.</li> + +<li>Elements or fields which are aggregates themselves are initialized +with a <em>single</em> initializer, but this may be a table +initializer or a compatible aggregate.</li> + +<li>Excess initializers cause an error.</li> + +</ul> + +<h2 id="init_table">Table Initializers</h2> +<p> +The following rules apply if a Lua table is used to initialize an +Array or a <tt>struct</tt>/<tt>union</tt>: +</p> +<ul> + +<li>If the table index <tt>[0]</tt> is non-<tt>nil</tt>, then the +table is assumed to be zero-based. Otherwise it's assumed to be +one-based.</li> + +<li>Array elements, starting at index zero, are initialized one-by-one +with the consecutive table elements, starting at either index +<tt>[0]</tt> or <tt>[1]</tt>. This process stops at the first +<tt>nil</tt> table element.</li> + +<li>If exactly one array element was initialized, it's repeated for +all the remaining elements. Otherwise all remaining uninitialized +elements are filled with zero bytes.</li> + +<li>The above logic only applies to arrays with a known fixed size. +A VLA is only initialized with the element(s) given in the table. +Depending on the use case, you may need to explicitly add a +<tt>NULL</tt> or <tt>0</tt> terminator to a VLA.</li> + +<li>A <tt>struct</tt>/<tt>union</tt> can be initialized in the +order of the declaration of its fields. Each field is initialized with +consecutive table elements, starting at either index <tt>[0]</tt> +or <tt>[1]</tt>. This process stops at the first <tt>nil</tt> table +element.</li> + +<li>Otherwise, if neither index <tt>[0]</tt> nor <tt>[1]</tt> is present, +a <tt>struct</tt>/<tt>union</tt> is initialized by looking up each field +name (as a string key) in the table. Each non-<tt>nil</tt> value is +used to initialize the corresponding field.</li> + +<li>Uninitialized fields of a <tt>struct</tt> are filled with zero +bytes, except for the trailing VLA of a VLS.</li> + +<li>Initialization of a <tt>union</tt> stops after one field has been +initialized. If no field has been initialized, the <tt>union</tt> is +filled with zero bytes.</li> + +<li>Elements or fields which are aggregates themselves are initialized +with a <em>single</em> initializer, but this may be a nested table +initializer (or a compatible aggregate).</li> + +<li>Excess initializers for an array cause an error. Excess +initializers for a <tt>struct</tt>/<tt>union</tt> are ignored. +Unrelated table entries are ignored, too.</li> + +</ul> +<p> +Example: +</p> +<pre class="code"> +local ffi = require("ffi") + +ffi.cdef[[ +struct foo { int a, b; }; +union bar { int i; double d; }; +struct nested { int x; struct foo y; }; +]] + +ffi.new("int[3]", {}) --> 0, 0, 0 +ffi.new("int[3]", {1}) --> 1, 1, 1 +ffi.new("int[3]", {1,2}) --> 1, 2, 0 +ffi.new("int[3]", {1,2,3}) --> 1, 2, 3 +ffi.new("int[3]", {[0]=1}) --> 1, 1, 1 +ffi.new("int[3]", {[0]=1,2}) --> 1, 2, 0 +ffi.new("int[3]", {[0]=1,2,3}) --> 1, 2, 3 +ffi.new("int[3]", {[0]=1,2,3,4}) --> error: too many initializers + +ffi.new("struct foo", {}) --> a = 0, b = 0 +ffi.new("struct foo", {1}) --> a = 1, b = 0 +ffi.new("struct foo", {1,2}) --> a = 1, b = 2 +ffi.new("struct foo", {[0]=1,2}) --> a = 1, b = 2 +ffi.new("struct foo", {b=2}) --> a = 0, b = 2 +ffi.new("struct foo", {a=1,b=2,c=3}) --> a = 1, b = 2 'c' is ignored + +ffi.new("union bar", {}) --> i = 0, d = 0.0 +ffi.new("union bar", {1}) --> i = 1, d = ? +ffi.new("union bar", {[0]=1,2}) --> i = 1, d = ? '2' is ignored +ffi.new("union bar", {d=2}) --> i = ?, d = 2.0 + +ffi.new("struct nested", {1,{2,3}}) --> x = 1, y.a = 2, y.b = 3 +ffi.new("struct nested", {x=1,y={2,3}}) --> x = 1, y.a = 2, y.b = 3 +</pre> + +<h2 id="cdata_ops">Operations on cdata Objects</h2> +<p> +All of the standard Lua operators can be applied to cdata objects or a +mix of a cdata object and another Lua object. The following list shows +the pre-defined operations. +</p> +<p> +Reference types are dereferenced <em>before</em> performing each of +the operations below — the operation is applied to the +C type pointed to by the reference. +</p> +<p> +The pre-defined operations are always tried first before deferring to a +metamethod or index table (if any) for the corresponding ctype (except +for <tt>__new</tt>). An error is raised if the metamethod lookup or +index table lookup fails. +</p> + +<h3 id="cdata_array">Indexing a cdata object</h3> +<ul> + +<li><b>Indexing a pointer/array</b>: a cdata pointer/array can be +indexed by a cdata number or a Lua number. The element address is +computed as the base address plus the number value multiplied by the +element size in bytes. A read access loads the element value and +<a href="#convert_tolua">converts it to a Lua object</a>. A write +access <a href="#convert_fromlua">converts a Lua object to the element +type</a> and stores the converted value to the element. An error is +raised if the element size is undefined or a write access to a +constant element is attempted.</li> + +<li><b>Dereferencing a <tt>struct</tt>/<tt>union</tt> field</b>: a +cdata <tt>struct</tt>/<tt>union</tt> or a pointer to a +<tt>struct</tt>/<tt>union</tt> can be dereferenced by a string key, +giving the field name. The field address is computed as the base +address plus the relative offset of the field. A read access loads the +field value and <a href="#convert_tolua">converts it to a Lua +object</a>. A write access <a href="#convert_fromlua">converts a Lua +object to the field type</a> and stores the converted value to the +field. An error is raised if a write access to a constant +<tt>struct</tt>/<tt>union</tt> or a constant field is attempted. +Scoped enum constants or static constants are treated like a constant +field.</li> + +<li><b>Indexing a complex number</b>: a complex number can be indexed +either by a cdata number or a Lua number with the values 0 or 1, or by +the strings <tt>"re"</tt> or <tt>"im"</tt>. A read access loads the +real part (<tt>[0]</tt>, <tt>.re</tt>) or the imaginary part +(<tt>[1]</tt>, <tt>.im</tt>) part of a complex number and +<a href="#convert_tolua">converts it to a Lua number</a>. The +sub-parts of a complex number are immutable — assigning to an +index of a complex number raises an error. Accessing out-of-bound +indexes returns unspecified results, but is guaranteed not to trigger +memory access violations.</li> + +<li><b>Indexing a vector</b>: a vector is treated like an array for +indexing purposes, except the vector elements are immutable — +assigning to an index of a vector raises an error.</li> + +</ul> +<p> +A ctype object can be indexed with a string key, too. The only +pre-defined operation is reading scoped constants of +<tt>struct</tt>/<tt>union</tt> types. All other accesses defer +to the corresponding metamethods or index tables (if any). +</p> +<p> +Note: since there's (deliberately) no address-of operator, a cdata +object holding a value type is effectively immutable after +initialization. The JIT compiler benefits from this fact when applying +certain optimizations. +</p> +<p> +As a consequence, the <em>elements</em> of complex numbers and +vectors are immutable. But the elements of an aggregate holding these +types <em>may</em> be modified of course. I.e. you cannot assign to +<tt>foo.c.im</tt>, but you can assign a (newly created) complex number +to <tt>foo.c</tt>. +</p> +<p> +The JIT compiler implements strict aliasing rules: accesses to different +types do <b>not</b> alias, except for differences in signedness (this +applies even to <tt>char</tt> pointers, unlike C99). Type punning +through unions is explicitly detected and allowed. +</p> + +<h3 id="cdata_call">Calling a cdata object</h3> +<ul> + +<li><b>Constructor</b>: a ctype object can be called and used as a +<a href="ext_ffi_api.html#ffi_new">constructor</a>. This is equivalent +to <tt>ffi.new(ct, ...)</tt>, unless a <tt>__new</tt> metamethod is +defined. The <tt>__new</tt> metamethod is called with the ctype object +plus any other arguments passed to the contructor. Note that you have to +use <tt>ffi.new</tt> inside of it, since calling <tt>ct(...)</tt> would +cause infinite recursion.</li> + +<li><b>C function call</b>: a cdata function or cdata function +pointer can be called. The passed arguments are +<a href="#convert_fromlua">converted to the C types</a> of the +parameters given by the function declaration. Arguments passed to the +variable argument part of vararg C function use +<a href="#convert_vararg">special conversion rules</a>. This +C function is called and the return value (if any) is +<a href="#convert_tolua">converted to a Lua object</a>.<br> +On Windows/x86 systems, <tt>__stdcall</tt> functions are automatically +detected and a function declared as <tt>__cdecl</tt> (the default) is +silently fixed up after the first call.</li> + +</ul> + +<h3 id="cdata_arith">Arithmetic on cdata objects</h3> +<ul> + +<li><b>Pointer arithmetic</b>: a cdata pointer/array and a cdata +number or a Lua number can be added or subtracted. The number must be +on the right hand side for a subtraction. The result is a pointer of +the same type with an address plus or minus the number value +multiplied by the element size in bytes. An error is raised if the +element size is undefined.</li> + +<li><b>Pointer difference</b>: two compatible cdata pointers/arrays +can be subtracted. The result is the difference between their +addresses, divided by the element size in bytes. An error is raised if +the element size is undefined or zero.</li> + +<li><b>64 bit integer arithmetic</b>: the standard arithmetic +operators (<tt>+ - * / % ^</tt> and unary +minus) can be applied to two cdata numbers, or a cdata number and a +Lua number. If one of them is an <tt>uint64_t</tt>, the other side is +converted to an <tt>uint64_t</tt> and an unsigned arithmetic operation +is performed. Otherwise both sides are converted to an +<tt>int64_t</tt> and a signed arithmetic operation is performed. The +result is a boxed 64 bit cdata object.<br> + +If one of the operands is an <tt>enum</tt> and the other operand is a +string, the string is converted to the value of a matching <tt>enum</tt> +constant before the above conversion.<br> + +These rules ensure that 64 bit integers are "sticky". Any +expression involving at least one 64 bit integer operand results +in another one. The undefined cases for the division, modulo and power +operators return <tt>2LL ^ 63</tt> or +<tt>2ULL ^ 63</tt>.<br> + +You'll have to explicitly convert a 64 bit integer to a Lua +number (e.g. for regular floating-point calculations) with +<tt>tonumber()</tt>. But note this may incur a precision loss.</li> + +<li><b>64 bit bitwise operations</b>: the rules for 64 bit +arithmetic operators apply analogously.<br> + +Unlike the other <tt>bit.*</tt> operations, <tt>bit.tobit()</tt> +converts a cdata number via <tt>int64_t</tt> to <tt>int32_t</tt> and +returns a Lua number.<br> + +For <tt>bit.band()</tt>, <tt>bit.bor()</tt> and <tt>bit.bxor()</tt>, the +conversion to <tt>int64_t</tt> or <tt>uint64_t</tt> applies to +<em>all</em> arguments, if <em>any</em> argument is a cdata number.<br> + +For all other operations, only the first argument is used to determine +the output type. This implies that a cdata number as a shift count for +shifts and rotates is accepted, but that alone does <em>not</em> cause +a cdata number output. + +</ul> + +<h3 id="cdata_comp">Comparisons of cdata objects</h3> +<ul> + +<li><b>Pointer comparison</b>: two compatible cdata pointers/arrays +can be compared. The result is the same as an unsigned comparison of +their addresses. <tt>nil</tt> is treated like a <tt>NULL</tt> pointer, +which is compatible with any other pointer type.</li> + +<li><b>64 bit integer comparison</b>: two cdata numbers, or a +cdata number and a Lua number can be compared with each other. If one +of them is an <tt>uint64_t</tt>, the other side is converted to an +<tt>uint64_t</tt> and an unsigned comparison is performed. Otherwise +both sides are converted to an <tt>int64_t</tt> and a signed +comparison is performed.<br> + +If one of the operands is an <tt>enum</tt> and the other operand is a +string, the string is converted to the value of a matching <tt>enum</tt> +constant before the above conversion.<br> + +<li><b>Comparisons for equality/inequality</b> never raise an error. +Even incompatible pointers can be compared for equality by address. Any +other incompatible comparison (also with non-cdata objects) treats the +two sides as unequal.</li> + +</ul> + +<h3 id="cdata_key">cdata objects as table keys</h3> +<p> +Lua tables may be indexed by cdata objects, but this doesn't provide +any useful semantics — <b>cdata objects are unsuitable as table +keys!</b> +</p> +<p> +A cdata object is treated like any other garbage-collected object and +is hashed and compared by its address for table indexing. Since +there's no interning for cdata value types, the same value may be +boxed in different cdata objects with different addresses. Thus +<tt>t[1LL+1LL]</tt> and <tt>t[2LL]</tt> usually <b>do not</b> point to +the same hash slot and they certainly <b>do not</b> point to the same +hash slot as <tt>t[2]</tt>. +</p> +<p> +It would seriously drive up implementation complexity and slow down +the common case, if one were to add extra handling for by-value +hashing and comparisons to Lua tables. Given the ubiquity of their use +inside the VM, this is not acceptable. +</p> +<p> +There are three viable alternatives, if you really need to use cdata +objects as keys: +</p> +<ul> + +<li>If you can get by with the precision of Lua numbers +(52 bits), then use <tt>tonumber()</tt> on a cdata number or +combine multiple fields of a cdata aggregate to a Lua number. Then use +the resulting Lua number as a key when indexing tables.<br> +One obvious benefit: <tt>t[tonumber(2LL)]</tt> <b>does</b> point to +the same slot as <tt>t[2]</tt>.</li> + +<li>Otherwise use either <tt>tostring()</tt> on 64 bit integers +or complex numbers or combine multiple fields of a cdata aggregate to +a Lua string (e.g. with +<a href="ext_ffi_api.html#ffi_string"><tt>ffi.string()</tt></a>). Then +use the resulting Lua string as a key when indexing tables.</li> + +<li>Create your own specialized hash table implementation using the +C types provided by the FFI library, just like you would in +C code. Ultimately this may give much better performance than the +other alternatives or what a generic by-value hash table could +possibly provide.</li> + +</ul> + +<h2 id="param">Parameterized Types</h2> +<p> +To facilitate some abstractions, the two functions +<a href="ext_ffi_api.html#ffi_typeof"><tt>ffi.typeof</tt></a> and +<a href="ext_ffi_api.html#ffi_cdef"><tt>ffi.cdef</tt></a> support +parameterized types in C declarations. Note: none of the other API +functions taking a cdecl allow this. +</p> +<p> +Any place you can write a <b><tt>typedef</tt> name</b>, an +<b>identifier</b> or a <b>number</b> in a declaration, you can write +<tt>$</tt> (the dollar sign) instead. These placeholders are replaced in +order of appearance with the arguments following the cdecl string: +</p> +<pre class="code"> +-- Declare a struct with a parameterized field type and name: +ffi.cdef([[ +typedef struct { $ $; } foo_t; +]], type1, name1) + +-- Anonymous struct with dynamic names: +local bar_t = ffi.typeof("struct { int $, $; }", name1, name2) +-- Derived pointer type: +local bar_ptr_t = ffi.typeof("$ *", bar_t) + +-- Parameterized dimensions work even where a VLA won't work: +local matrix_t = ffi.typeof("uint8_t[$][$]", width, height) +</pre> +<p> +Caveat: this is <em>not</em> simple text substitution! A passed ctype or +cdata object is treated like the underlying type, a passed string is +considered an identifier and a number is considered a number. You must +not mix this up: e.g. passing <tt>"int"</tt> as a string doesn't work in +place of a type, you'd need to use <tt>ffi.typeof("int")</tt> instead. +</p> +<p> +The main use for parameterized types are libraries implementing abstract +data types +(<a href="https://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8">example</a>), +similar to what can be achieved with C++ template metaprogramming. +Another use case are derived types of anonymous structs, which avoids +pollution of the global struct namespace. +</p> +<p> +Please note that parameterized types are a nice tool and indispensable +for certain use cases. But you'll want to use them sparingly in regular +code, e.g. when all types are actually fixed. +</p> + +<h2 id="gc">Garbage Collection of cdata Objects</h2> +<p> +All explicitly (<tt>ffi.new()</tt>, <tt>ffi.cast()</tt> etc.) or +implicitly (accessors) created cdata objects are garbage collected. +You need to ensure to retain valid references to cdata objects +somewhere on a Lua stack, an upvalue or in a Lua table while they are +still in use. Once the last reference to a cdata object is gone, the +garbage collector will automatically free the memory used by it (at +the end of the next GC cycle). +</p> +<p> +Please note that pointers themselves are cdata objects, however they +are <b>not</b> followed by the garbage collector. So e.g. if you +assign a cdata array to a pointer, you must keep the cdata object +holding the array alive as long as the pointer is still in use: +</p> +<pre class="code"> +ffi.cdef[[ +typedef struct { int *a; } foo_t; +]] + +local s = ffi.new("foo_t", ffi.new("int[10]")) -- <span style="color:#c00000;">WRONG!</span> + +local a = ffi.new("int[10]") -- <span style="color:#00a000;">OK</span> +local s = ffi.new("foo_t", a) +-- Now do something with 's', but keep 'a' alive until you're done. +</pre> +<p> +Similar rules apply for Lua strings which are implicitly converted to +<tt>"const char *"</tt>: the string object itself must be +referenced somewhere or it'll be garbage collected eventually. The +pointer will then point to stale data, which may have already been +overwritten. Note that <em>string literals</em> are automatically kept +alive as long as the function containing it (actually its prototype) +is not garbage collected. +</p> +<p> +Objects which are passed as an argument to an external C function +are kept alive until the call returns. So it's generally safe to +create temporary cdata objects in argument lists. This is a common +idiom for <a href="#convert_vararg">passing specific C types to +vararg functions</a>. +</p> +<p> +Memory areas returned by C functions (e.g. from <tt>malloc()</tt>) +must be manually managed, of course (or use +<a href="ext_ffi_api.html#ffi_gc"><tt>ffi.gc()</tt></a>). Pointers to +cdata objects are indistinguishable from pointers returned by C +functions (which is one of the reasons why the GC cannot follow them). +</p> + +<h2 id="callback">Callbacks</h2> +<p> +The LuaJIT FFI automatically generates special callback functions +whenever a Lua function is converted to a C function pointer. This +associates the generated callback function pointer with the C type +of the function pointer and the Lua function object (closure). +</p> +<p> +This can happen implicitly due to the usual conversions, e.g. when +passing a Lua function to a function pointer argument. Or you can use +<tt>ffi.cast()</tt> to explicitly cast a Lua function to a +C function pointer. +</p> +<p> +Currently only certain C function types can be used as callback +functions. Neither C vararg functions nor functions with +pass-by-value aggregate argument or result types are supported. There +are no restrictions for the kind of Lua functions that can be called +from the callback — no checks for the proper number of arguments +are made. The return value of the Lua function will be converted to the +result type and an error will be thrown for invalid conversions. +</p> +<p> +It's allowed to throw errors across a callback invocation, but it's not +advisable in general. Do this only if you know the C function, that +called the callback, copes with the forced stack unwinding and doesn't +leak resources. +</p> +<p> +One thing that's not allowed, is to let an FFI call into a C function +get JIT-compiled, which in turn calls a callback, calling into Lua again. +Usually this attempt is caught by the interpreter first and the +C function is blacklisted for compilation. +</p> +<p> +However, this heuristic may fail under specific circumstances: e.g. a +message polling function might not run Lua callbacks right away and the call +gets JIT-compiled. If it later happens to call back into Lua (e.g. a rarely +invoked error callback), you'll get a VM PANIC with the message +<tt>"bad callback"</tt>. Then you'll need to manually turn off +JIT-compilation with +<a href="ext_jit.html#jit_onoff_func"><tt>jit.off()</tt></a> for the +surrounding Lua function that invokes such a message polling function (or +similar). +</p> + +<h3 id="callback_resources">Callback resource handling</h3> +<p> +Callbacks take up resources — you can only have a limited number +of them at the same time (500 - 1000, depending on the +architecture). The associated Lua functions are anchored to prevent +garbage collection, too. +</p> +<p> +<b>Callbacks due to implicit conversions are permanent!</b> There is no +way to guess their lifetime, since the C side might store the +function pointer for later use (typical for GUI toolkits). The associated +resources cannot be reclaimed until termination: +</p> +<pre class="code"> +ffi.cdef[[ +typedef int (__stdcall *WNDENUMPROC)(void *hwnd, intptr_t l); +int EnumWindows(WNDENUMPROC func, intptr_t l); +]] + +-- Implicit conversion to a callback via function pointer argument. +local count = 0 +ffi.C.EnumWindows(function(hwnd, l) + count = count + 1 + return true +end, 0) +-- The callback is permanent and its resources cannot be reclaimed! +-- Ok, so this may not be a problem, if you do this only once. +</pre> +<p> +Note: this example shows that you <em>must</em> properly declare +<tt>__stdcall</tt> callbacks on Windows/x86 systems. The calling +convention cannot be automatically detected, unlike for +<tt>__stdcall</tt> calls <em>to</em> Windows functions. +</p> +<p> +For some use cases it's necessary to free up the resources or to +dynamically redirect callbacks. Use an explicit cast to a +C function pointer and keep the resulting cdata object. Then use +the <a href="ext_ffi_api.html#callback_free"><tt>cb:free()</tt></a> +or <a href="ext_ffi_api.html#callback_set"><tt>cb:set()</tt></a> methods +on the cdata object: +</p> +<pre class="code"> +-- Explicitly convert to a callback via cast. +local count = 0 +local cb = ffi.cast("WNDENUMPROC", function(hwnd, l) + count = count + 1 + return true +end) + +-- Pass it to a C function. +ffi.C.EnumWindows(cb, 0) +-- EnumWindows doesn't need the callback after it returns, so free it. + +cb:free() +-- The callback function pointer is no longer valid and its resources +-- will be reclaimed. The created Lua closure will be garbage collected. +</pre> + +<h3 id="callback_performance">Callback performance</h3> +<p> +<b>Callbacks are slow!</b> First, the C to Lua transition itself +has an unavoidable cost, similar to a <tt>lua_call()</tt> or +<tt>lua_pcall()</tt>. Argument and result marshalling add to that cost. +And finally, neither the C compiler nor LuaJIT can inline or +optimize across the language barrier and hoist repeated computations out +of a callback function. +</p> +<p> +Do not use callbacks for performance-sensitive work: e.g. consider a +numerical integration routine which takes a user-defined function to +integrate over. It's a bad idea to call a user-defined Lua function from +C code millions of times. The callback overhead will be absolutely +detrimental for performance. +</p> +<p> +It's considerably faster to write the numerical integration routine +itself in Lua — the JIT compiler will be able to inline the +user-defined function and optimize it together with its calling context, +with very competitive performance. +</p> +<p> +As a general guideline: <b>use callbacks only when you must</b>, because +of existing C APIs. E.g. callback performance is irrelevant for a +GUI application, which waits for user input most of the time, anyway. +</p> +<p> +For new designs <b>avoid push-style APIs</b>: a C function repeatedly +calling a callback for each result. Instead <b>use pull-style APIs</b>: +call a C function repeatedly to get a new result. Calls from Lua +to C via the FFI are much faster than the other way round. Most well-designed +libraries already use pull-style APIs (read/write, get/put). +</p> + +<h2 id="clib">C Library Namespaces</h2> +<p> +A C library namespace is a special kind of object which allows +access to the symbols contained in shared libraries or the default +symbol namespace. The default +<a href="ext_ffi_api.html#ffi_C"><tt>ffi.C</tt></a> namespace is +automatically created when the FFI library is loaded. C library +namespaces for specific shared libraries may be created with the +<a href="ext_ffi_api.html#ffi_load"><tt>ffi.load()</tt></a> API +function. +</p> +<p> +Indexing a C library namespace object with a symbol name (a Lua +string) automatically binds it to the library. First the symbol type +is resolved — it must have been declared with +<a href="ext_ffi_api.html#ffi_cdef"><tt>ffi.cdef</tt></a>. Then the +symbol address is resolved by searching for the symbol name in the +associated shared libraries or the default symbol namespace. Finally, +the resulting binding between the symbol name, the symbol type and its +address is cached. Missing symbol declarations or nonexistent symbol +names cause an error. +</p> +<p> +This is what happens on a <b>read access</b> for the different kinds of +symbols: +</p> +<ul> + +<li>External functions: a cdata object with the type of the function +and its address is returned.</li> + +<li>External variables: the symbol address is dereferenced and the +loaded value is <a href="#convert_tolua">converted to a Lua object</a> +and returned.</li> + +<li>Constant values (<tt>static const</tt> or <tt>enum</tt> +constants): the constant is <a href="#convert_tolua">converted to a +Lua object</a> and returned.</li> + +</ul> +<p> +This is what happens on a <b>write access</b>: +</p> +<ul> + +<li>External variables: the value to be written is +<a href="#convert_fromlua">converted to the C type</a> of the +variable and then stored at the symbol address.</li> + +<li>Writing to constant variables or to any other symbol type causes +an error, like any other attempted write to a constant location.</li> + +</ul> +<p> +C library namespaces themselves are garbage collected objects. If +the last reference to the namespace object is gone, the garbage +collector will eventually release the shared library reference and +remove all memory associated with the namespace. Since this may +trigger the removal of the shared library from the memory of the +running process, it's generally <em>not safe</em> to use function +cdata objects obtained from a library if the namespace object may be +unreferenced. +</p> +<p> +Performance notice: the JIT compiler specializes to the identity of +namespace objects and to the strings used to index it. This +effectively turns function cdata objects into constants. It's not +useful and actually counter-productive to explicitly cache these +function objects, e.g. <tt>local strlen = ffi.C.strlen</tt>. OTOH it +<em>is</em> useful to cache the namespace itself, e.g. <tt>local C = +ffi.C</tt>. +</p> + +<h2 id="policy">No Hand-holding!</h2> +<p> +The FFI library has been designed as <b>a low-level library</b>. The +goal is to interface with C code and C data types with a +minimum of overhead. This means <b>you can do anything you can do +from C</b>: access all memory, overwrite anything in memory, call +machine code at any memory address and so on. +</p> +<p> +The FFI library provides <b>no memory safety</b>, unlike regular Lua +code. It will happily allow you to dereference a <tt>NULL</tt> +pointer, to access arrays out of bounds or to misdeclare +C functions. If you make a mistake, your application might crash, +just like equivalent C code would. +</p> +<p> +This behavior is inevitable, since the goal is to provide full +interoperability with C code. Adding extra safety measures, like +bounds checks, would be futile. There's no way to detect +misdeclarations of C functions, since shared libraries only +provide symbol names, but no type information. Likewise there's no way +to infer the valid range of indexes for a returned pointer. +</p> +<p> +Again: the FFI library is a low-level library. This implies it needs +to be used with care, but it's flexibility and performance often +outweigh this concern. If you're a C or C++ developer, it'll be easy +to apply your existing knowledge. OTOH writing code for the FFI +library is not for the faint of heart and probably shouldn't be the +first exercise for someone with little experience in Lua, C or C++. +</p> +<p> +As a corollary of the above, the FFI library is <b>not safe for use by +untrusted Lua code</b>. If you're sandboxing untrusted Lua code, you +definitely don't want to give this code access to the FFI library or +to <em>any</em> cdata object (except 64 bit integers or complex +numbers). Any properly engineered Lua sandbox needs to provide safety +wrappers for many of the standard Lua library functions — +similar wrappers need to be written for high-level operations on FFI +data types, too. +</p> + +<h2 id="status">Current Status</h2> +<p> +The initial release of the FFI library has some limitations and is +missing some features. Most of these will be fixed in future releases. +</p> +<p> +<a href="#clang">C language support</a> is +currently incomplete: +</p> +<ul> +<li>C declarations are not passed through a C pre-processor, +yet.</li> +<li>The C parser is able to evaluate most constant expressions +commonly found in C header files. However it doesn't handle the +full range of C expression semantics and may fail for some +obscure constructs.</li> +<li><tt>static const</tt> declarations only work for integer types +up to 32 bits. Neither declaring string constants nor +floating-point constants is supported.</li> +<li>Packed <tt>struct</tt> bitfields that cross container boundaries +are not implemented.</li> +<li>Native vector types may be defined with the GCC <tt>mode</tt> or +<tt>vector_size</tt> attribute. But no operations other than loading, +storing and initializing them are supported, yet.</li> +<li>The <tt>volatile</tt> type qualifier is currently ignored by +compiled code.</li> +<li><a href="ext_ffi_api.html#ffi_cdef"><tt>ffi.cdef</tt></a> silently +ignores most re-declarations. Note: avoid re-declarations which do not +conform to C99. The implementation will eventually be changed to +perform strict checks.</li> +</ul> +<p> +The JIT compiler already handles a large subset of all FFI operations. +It automatically falls back to the interpreter for unimplemented +operations (you can check for this with the +<a href="running.html#opt_j"><tt>-jv</tt></a> command line option). +The following operations are currently not compiled and may exhibit +suboptimal performance, especially when used in inner loops: +</p> +<ul> +<li>Vector operations.</li> +<li>Table initializers.</li> +<li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li> +<li>Non-default initialization of VLA/VLS or large C types +(> 128 bytes or > 16 array elements.</li> +<li>Bitfield initializations.</li> +<li>Pointer differences for element sizes that are not a power of +two.</li> +<li>Calls to C functions with aggregates passed or returned by +value.</li> +<li>Calls to ctype metamethods which are not plain functions.</li> +<li>ctype <tt>__newindex</tt> tables and non-string lookups in ctype +<tt>__index</tt> tables.</li> +<li><tt>tostring()</tt> for cdata types.</li> +<li>Calls to <tt>ffi.cdef()</tt>, <tt>ffi.load()</tt> and +<tt>ffi.metatype()</tt>.</li> +</ul> +<p> +Other missing features: +</p> +<ul> +<li>Arithmetic for <tt>complex</tt> numbers.</li> +<li>Passing structs by value to vararg C functions.</li> +<li><a href="extensions.html#exceptions">C++ exception interoperability</a> +does not extend to C functions called via the FFI, if the call is +compiled.</li> +</ul> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/ext_ffi_tutorial.html b/lib/LuaJIT/doc/ext_ffi_tutorial.html new file mode 100644 index 0000000..e0b0821 --- /dev/null +++ b/lib/LuaJIT/doc/ext_ffi_tutorial.html @@ -0,0 +1,602 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>FFI Tutorial</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +<style type="text/css"> +table.idiomtable { font-size: 90%; line-height: 1.2; } +table.idiomtable tt { font-size: 100%; } +table.idiomtable td { vertical-align: top; } +tr.idiomhead td { font-weight: bold; } +td.idiomlua b { font-weight: normal; color: #2142bf; } +</style> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>FFI Tutorial</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a class="current" href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +This page is intended to give you an overview of the features of the FFI +library by presenting a few use cases and guidelines. +</p> +<p> +This page makes no attempt to explain all of the FFI library, though. +You'll want to have a look at the <a href="ext_ffi_api.html">ffi.* API +function reference</a> and the <a href="ext_ffi_semantics.html">FFI +semantics</a> to learn more. +</p> + +<h2 id="load">Loading the FFI Library</h2> +<p> +The FFI library is built into LuaJIT by default, but it's not loaded +and initialized by default. The suggested way to use the FFI library +is to add the following to the start of every Lua file that needs one +of its functions: +</p> +<pre class="code"> +local ffi = require("ffi") +</pre> +<p> +Please note this doesn't define an <tt>ffi</tt> variable in the table +of globals — you really need to use the local variable. The +<tt>require</tt> function ensures the library is only loaded once. +</p> +<p style="font-size: 8pt;"> +Note: If you want to experiment with the FFI from the interactive prompt +of the command line executable, omit the <tt>local</tt>, as it doesn't +preserve local variables across lines. +</p> + +<h2 id="sleep">Accessing Standard System Functions</h2> +<p> +The following code explains how to access standard system functions. +We slowly print two lines of dots by sleeping for 10 milliseconds +after each dot: +</p> +<pre class="code mark"> +<span class="codemark"> +① + + + + + +② +③ +④ + + + +⑤ + + + + + +⑥</span>local ffi = require("ffi") +ffi.cdef[[ +<span style="color:#00a000;">void Sleep(int ms); +int poll(struct pollfd *fds, unsigned long nfds, int timeout);</span> +]] + +local sleep +if ffi.os == "Windows" then + function sleep(s) + ffi.C.Sleep(s*1000) + end +else + function sleep(s) + ffi.C.poll(nil, 0, s*1000) + end +end + +for i=1,160 do + io.write("."); io.flush() + sleep(0.01) +end +io.write("\n") +</pre> +<p> +Here's the step-by-step explanation: +</p> +<p> +<span class="mark">①</span> This defines the +C library functions we're going to use. The part inside the +double-brackets (in green) is just standard C syntax. You can +usually get this info from the C header files or the +documentation provided by each C library or C compiler. +</p> +<p> +<span class="mark">②</span> The difficulty we're +facing here, is that there are different standards to choose from. +Windows has a simple <tt>Sleep()</tt> function. On other systems there +are a variety of functions available to achieve sub-second sleeps, but +with no clear consensus. Thankfully <tt>poll()</tt> can be used for +this task, too, and it's present on most non-Windows systems. The +check for <tt>ffi.os</tt> makes sure we use the Windows-specific +function only on Windows systems. +</p> +<p> +<span class="mark">③</span> Here we're wrapping the +call to the C function in a Lua function. This isn't strictly +necessary, but it's helpful to deal with system-specific issues only +in one part of the code. The way we're wrapping it ensures the check +for the OS is only done during initialization and not for every call. +</p> +<p> +<span class="mark">④</span> A more subtle point is +that we defined our <tt>sleep()</tt> function (for the sake of this +example) as taking the number of seconds, but accepting fractional +seconds. Multiplying this by 1000 gets us milliseconds, but that still +leaves it a Lua number, which is a floating-point value. Alas, the +<tt>Sleep()</tt> function only accepts an integer value. Luckily for +us, the FFI library automatically performs the conversion when calling +the function (truncating the FP value towards zero, like in C). +</p> +<p style="font-size: 8pt;"> +Some readers will notice that <tt>Sleep()</tt> is part of +<tt>KERNEL32.DLL</tt> and is also a <tt>stdcall</tt> function. So how +can this possibly work? The FFI library provides the <tt>ffi.C</tt> +default C library namespace, which allows calling functions from +the default set of libraries, like a C compiler would. Also, the +FFI library automatically detects <tt>stdcall</tt> functions, so you +don't need to declare them as such. +</p> +<p> +<span class="mark">⑤</span> The <tt>poll()</tt> +function takes a couple more arguments we're not going to use. You can +simply use <tt>nil</tt> to pass a <tt>NULL</tt> pointer and <tt>0</tt> +for the <tt>nfds</tt> parameter. Please note that the +number <tt>0</tt> <em>does not convert to a pointer value</em>, +unlike in C++. You really have to pass pointers to pointer arguments +and numbers to number arguments. +</p> +<p style="font-size: 8pt;"> +The page on <a href="ext_ffi_semantics.html">FFI semantics</a> has all +of the gory details about +<a href="ext_ffi_semantics.html#convert">conversions between Lua +objects and C types</a>. For the most part you don't have to deal +with this, as it's performed automatically and it's carefully designed +to bridge the semantic differences between Lua and C. +</p> +<p> +<span class="mark">⑥</span> Now that we have defined +our own <tt>sleep()</tt> function, we can just call it from plain Lua +code. That wasn't so bad, huh? Turning these boring animated dots into +a fascinating best-selling game is left as an exercise for the reader. +:-) +</p> + +<h2 id="zlib">Accessing the zlib Compression Library</h2> +<p> +The following code shows how to access the <a +href="http://zlib.net/">zlib</a> compression library from Lua code. +We'll define two convenience wrapper functions that take a string and +compress or uncompress it to another string: +</p> +<pre class="code mark"> +<span class="codemark"> +① + + + + + + +② + + +③ + +④ + + +⑤ + + +⑥ + + + + + + + +⑦</span>local ffi = require("ffi") +ffi.cdef[[ +<span style="color:#00a000;">unsigned long compressBound(unsigned long sourceLen); +int compress2(uint8_t *dest, unsigned long *destLen, + const uint8_t *source, unsigned long sourceLen, int level); +int uncompress(uint8_t *dest, unsigned long *destLen, + const uint8_t *source, unsigned long sourceLen);</span> +]] +local zlib = ffi.load(ffi.os == "Windows" and "zlib1" or "z") + +local function compress(txt) + local n = zlib.compressBound(#txt) + local buf = ffi.new("uint8_t[?]", n) + local buflen = ffi.new("unsigned long[1]", n) + local res = zlib.compress2(buf, buflen, txt, #txt, 9) + assert(res == 0) + return ffi.string(buf, buflen[0]) +end + +local function uncompress(comp, n) + local buf = ffi.new("uint8_t[?]", n) + local buflen = ffi.new("unsigned long[1]", n) + local res = zlib.uncompress(buf, buflen, comp, #comp) + assert(res == 0) + return ffi.string(buf, buflen[0]) +end + +-- Simple test code. +local txt = string.rep("abcd", 1000) +print("Uncompressed size: ", #txt) +local c = compress(txt) +print("Compressed size: ", #c) +local txt2 = uncompress(c, #txt) +assert(txt2 == txt) +</pre> +<p> +Here's the step-by-step explanation: +</p> +<p> +<span class="mark">①</span> This defines some of the +C functions provided by zlib. For the sake of this example, some +type indirections have been reduced and it uses the pre-defined +fixed-size integer types, while still adhering to the zlib API/ABI. +</p> +<p> +<span class="mark">②</span> This loads the zlib shared +library. On POSIX systems it's named <tt>libz.so</tt> and usually +comes pre-installed. Since <tt>ffi.load()</tt> automatically adds any +missing standard prefixes/suffixes, we can simply load the +<tt>"z"</tt> library. On Windows it's named <tt>zlib1.dll</tt> and +you'll have to download it first from the +<a href="http://zlib.net/"><span class="ext">»</span> zlib site</a>. The check for +<tt>ffi.os</tt> makes sure we pass the right name to +<tt>ffi.load()</tt>. +</p> +<p> +<span class="mark">③</span> First, the maximum size of +the compression buffer is obtained by calling the +<tt>zlib.compressBound</tt> function with the length of the +uncompressed string. The next line allocates a byte buffer of this +size. The <tt>[?]</tt> in the type specification indicates a +variable-length array (VLA). The actual number of elements of this +array is given as the 2nd argument to <tt>ffi.new()</tt>. +</p> +<p> +<span class="mark">④</span> This may look strange at +first, but have a look at the declaration of the <tt>compress2</tt> +function from zlib: the destination length is defined as a pointer! +This is because you pass in the maximum buffer size and get back the +actual length that was used. +</p> +<p> +In C you'd pass in the address of a local variable +(<tt>&buflen</tt>). But since there's no address-of operator in +Lua, we'll just pass in a one-element array. Conveniently it can be +initialized with the maximum buffer size in one step. Calling the +actual <tt>zlib.compress2</tt> function is then straightforward. +</p> +<p> +<span class="mark">⑤</span> We want to return the +compressed data as a Lua string, so we'll use <tt>ffi.string()</tt>. +It needs a pointer to the start of the data and the actual length. The +length has been returned in the <tt>buflen</tt> array, so we'll just +get it from there. +</p> +<p style="font-size: 8pt;"> +Note that since the function returns now, the <tt>buf</tt> and +<tt>buflen</tt> variables will eventually be garbage collected. This +is fine, because <tt>ffi.string()</tt> has copied the contents to a +newly created (interned) Lua string. If you plan to call this function +lots of times, consider reusing the buffers and/or handing back the +results in buffers instead of strings. This will reduce the overhead +for garbage collection and string interning. +</p> +<p> +<span class="mark">⑥</span> The <tt>uncompress</tt> +functions does the exact opposite of the <tt>compress</tt> function. +The compressed data doesn't include the size of the original string, +so this needs to be passed in. Otherwise no surprises here. +</p> +<p> +<span class="mark">⑦</span> The code, that makes use +of the functions we just defined, is just plain Lua code. It doesn't +need to know anything about the LuaJIT FFI — the convenience +wrapper functions completely hide it. +</p> +<p> +One major advantage of the LuaJIT FFI is that you are now able to +write those wrappers <em>in Lua</em>. And at a fraction of the time it +would cost you to create an extra C module using the Lua/C API. +Many of the simpler C functions can probably be used directly +from your Lua code, without any wrappers. +</p> +<p style="font-size: 8pt;"> +Side note: the zlib API uses the <tt>long</tt> type for passing +lengths and sizes around. But all those zlib functions actually only +deal with 32 bit values. This is an unfortunate choice for a +public API, but may be explained by zlib's history — we'll just +have to deal with it. +</p> +<p style="font-size: 8pt;"> +First, you should know that a <tt>long</tt> is a 64 bit type e.g. +on POSIX/x64 systems, but a 32 bit type on Windows/x64 and on +32 bit systems. Thus a <tt>long</tt> result can be either a plain +Lua number or a boxed 64 bit integer cdata object, depending on +the target system. +</p> +<p style="font-size: 8pt;"> +Ok, so the <tt>ffi.*</tt> functions generally accept cdata objects +wherever you'd want to use a number. That's why we get a away with +passing <tt>n</tt> to <tt>ffi.string()</tt> above. But other Lua +library functions or modules don't know how to deal with this. So for +maximum portability one needs to use <tt>tonumber()</tt> on returned +<tt>long</tt> results before passing them on. Otherwise the +application might work on some systems, but would fail in a POSIX/x64 +environment. +</p> + +<h2 id="metatype">Defining Metamethods for a C Type</h2> +<p> +The following code explains how to define metamethods for a C type. +We define a simple point type and add some operations to it: +</p> +<pre class="code mark"> +<span class="codemark"> +① + + + +② + +③ + +④ + + + +⑤ + +⑥</span>local ffi = require("ffi") +ffi.cdef[[ +<span style="color:#00a000;">typedef struct { double x, y; } point_t;</span> +]] + +local point +local mt = { + __add = function(a, b) return point(a.x+b.x, a.y+b.y) end, + __len = function(a) return math.sqrt(a.x*a.x + a.y*a.y) end, + __index = { + area = function(a) return a.x*a.x + a.y*a.y end, + }, +} +point = ffi.metatype("point_t", mt) + +local a = point(3, 4) +print(a.x, a.y) --> 3 4 +print(#a) --> 5 +print(a:area()) --> 25 +local b = a + point(0.5, 8) +print(#b) --> 12.5 +</pre> +<p> +Here's the step-by-step explanation: +</p> +<p> +<span class="mark">①</span> This defines the C type for a +two-dimensional point object. +</p> +<p> +<span class="mark">②</span> We have to declare the variable +holding the point constructor first, because it's used inside of a +metamethod. +</p> +<p> +<span class="mark">③</span> Let's define an <tt>__add</tt> +metamethod which adds the coordinates of two points and creates a new +point object. For simplicity, this function assumes that both arguments +are points. But it could be any mix of objects, if at least one operand +is of the required type (e.g. adding a point plus a number or vice +versa). Our <tt>__len</tt> metamethod returns the distance of a point to +the origin. +</p> +<p> +<span class="mark">④</span> If we run out of operators, we can +define named methods, too. Here the <tt>__index</tt> table defines an +<tt>area</tt> function. For custom indexing needs, one might want to +define <tt>__index</tt> and <tt>__newindex</tt> <em>functions</em> instead. +</p> +<p> +<span class="mark">⑤</span> This associates the metamethods with +our C type. This only needs to be done once. For convenience, a +constructor is returned by +<a href="ext_ffi_api.html#ffi_metatype"><tt>ffi.metatype()</tt></a>. +We're not required to use it, though. The original C type can still +be used e.g. to create an array of points. The metamethods automatically +apply to any and all uses of this type. +</p> +<p> +Please note that the association with a metatable is permanent and +<b>the metatable must not be modified afterwards!</b> Ditto for the +<tt>__index</tt> table. +</p> +<p> +<span class="mark">⑥</span> Here are some simple usage examples +for the point type and their expected results. The pre-defined +operations (such as <tt>a.x</tt>) can be freely mixed with the newly +defined metamethods. Note that <tt>area</tt> is a method and must be +called with the Lua syntax for methods: <tt>a:area()</tt>, not +<tt>a.area()</tt>. +</p> +<p> +The C type metamethod mechanism is most useful when used in +conjunction with C libraries that are written in an object-oriented +style. Creators return a pointer to a new instance and methods take an +instance pointer as the first argument. Sometimes you can just point +<tt>__index</tt> to the library namespace and <tt>__gc</tt> to the +destructor and you're done. But often enough you'll want to add +convenience wrappers, e.g. to return actual Lua strings or when +returning multiple values. +</p> +<p> +Some C libraries only declare instance pointers as an opaque +<tt>void *</tt> type. In this case you can use a fake type for all +declarations, e.g. a pointer to a named (incomplete) struct will do: +<tt>typedef struct foo_type *foo_handle</tt>. The C side doesn't +know what you declare with the LuaJIT FFI, but as long as the underlying +types are compatible, everything still works. +</p> + +<h2 id="idioms">Translating C Idioms</h2> +<p> +Here's a list of common C idioms and their translation to the +LuaJIT FFI: +</p> +<table class="idiomtable"> +<tr class="idiomhead"> +<td class="idiomdesc">Idiom</td> +<td class="idiomc">C code</td> +<td class="idiomlua">Lua code</td> +</tr> +<tr class="odd separate"> +<td class="idiomdesc">Pointer dereference<br><tt>int *p;</tt></td><td class="idiomc"><tt>x = *p;<br>*p = y;</tt></td><td class="idiomlua"><tt>x = <b>p[0]</b><br><b>p[0]</b> = y</tt></td></tr> +<tr class="even"> +<td class="idiomdesc">Pointer indexing<br><tt>int i, *p;</tt></td><td class="idiomc"><tt>x = p[i];<br>p[i+1] = y;</tt></td><td class="idiomlua"><tt>x = p[i]<br>p[i+1] = y</tt></td></tr> +<tr class="odd"> +<td class="idiomdesc">Array indexing<br><tt>int i, a[];</tt></td><td class="idiomc"><tt>x = a[i];<br>a[i+1] = y;</tt></td><td class="idiomlua"><tt>x = a[i]<br>a[i+1] = y</tt></td></tr> +<tr class="even separate"> +<td class="idiomdesc"><tt>struct</tt>/<tt>union</tt> dereference<br><tt>struct foo s;</tt></td><td class="idiomc"><tt>x = s.field;<br>s.field = y;</tt></td><td class="idiomlua"><tt>x = s.field<br>s.field = y</tt></td></tr> +<tr class="odd"> +<td class="idiomdesc"><tt>struct</tt>/<tt>union</tt> pointer deref.<br><tt>struct foo *sp;</tt></td><td class="idiomc"><tt>x = sp->field;<br>sp->field = y;</tt></td><td class="idiomlua"><tt>x = <b>s.field</b><br><b>s.field</b> = y</tt></td></tr> +<tr class="even separate"> +<td class="idiomdesc">Pointer arithmetic<br><tt>int i, *p;</tt></td><td class="idiomc"><tt>x = p + i;<br>y = p - i;</tt></td><td class="idiomlua"><tt>x = p + i<br>y = p - i</tt></td></tr> +<tr class="odd"> +<td class="idiomdesc">Pointer difference<br><tt>int *p1, *p2;</tt></td><td class="idiomc"><tt>x = p1 - p2;</tt></td><td class="idiomlua"><tt>x = p1 - p2</tt></td></tr> +<tr class="even"> +<td class="idiomdesc">Array element pointer<br><tt>int i, a[];</tt></td><td class="idiomc"><tt>x = &a[i];</tt></td><td class="idiomlua"><tt>x = <b>a+i</b></tt></td></tr> +<tr class="odd"> +<td class="idiomdesc">Cast pointer to address<br><tt>int *p;</tt></td><td class="idiomc"><tt>x = (intptr_t)p;</tt></td><td class="idiomlua"><tt>x = <b>tonumber(<br> ffi.cast("intptr_t",<br> p))</b></tt></td></tr> +<tr class="even separate"> +<td class="idiomdesc">Functions with outargs<br><tt>void foo(int *inoutlen);</tt></td><td class="idiomc"><tt>int len = x;<br>foo(&len);<br>y = len;</tt></td><td class="idiomlua"><tt><b>local len =<br> ffi.new("int[1]", x)<br>foo(len)<br>y = len[0]</b></tt></td></tr> +<tr class="odd"> +<td class="idiomdesc"><a href="ext_ffi_semantics.html#convert_vararg">Vararg conversions</a><br><tt>int printf(char *fmt, ...);</tt></td><td class="idiomc"><tt>printf("%g", 1.0);<br>printf("%d", 1);<br> </tt></td><td class="idiomlua"><tt>printf("%g", 1);<br>printf("%d",<br> <b>ffi.new("int", 1)</b>)</tt></td></tr> +</table> + +<h2 id="cache">To Cache or Not to Cache</h2> +<p> +It's a common Lua idiom to cache library functions in local variables +or upvalues, e.g.: +</p> +<pre class="code"> +local byte, char = string.byte, string.char +local function foo(x) + return char(byte(x)+1) +end +</pre> +<p> +This replaces several hash-table lookups with a (faster) direct use of +a local or an upvalue. This is less important with LuaJIT, since the +JIT compiler optimizes hash-table lookups a lot and is even able to +hoist most of them out of the inner loops. It can't eliminate +<em>all</em> of them, though, and it saves some typing for often-used +functions. So there's still a place for this, even with LuaJIT. +</p> +<p> +The situation is a bit different with C function calls via the +FFI library. The JIT compiler has special logic to eliminate <em>all +of the lookup overhead</em> for functions resolved from a +<a href="ext_ffi_semantics.html#clib">C library namespace</a>! +Thus it's not helpful and actually counter-productive to cache +individual C functions like this: +</p> +<pre class="code"> +local <b>funca</b>, <b>funcb</b> = ffi.C.funca, ffi.C.funcb -- <span style="color:#c00000;">Not helpful!</span> +local function foo(x, n) + for i=1,n do <b>funcb</b>(<b>funca</b>(x, i), 1) end +end +</pre> +<p> +This turns them into indirect calls and generates bigger and slower +machine code. Instead you'll want to cache the namespace itself and +rely on the JIT compiler to eliminate the lookups: +</p> +<pre class="code"> +local <b>C</b> = ffi.C -- <span style="color:#00a000;">Instead use this!</span> +local function foo(x, n) + for i=1,n do <b>C.funcb</b>(<b>C.funca</b>(x, i), 1) end +end +</pre> +<p> +This generates both shorter and faster code. So <b>don't cache +C functions</b>, but <b>do</b> cache namespaces! Most often the +namespace is already in a local variable at an outer scope, e.g. from +<tt>local lib = ffi.load(...)</tt>. Note that copying +it to a local variable in the function scope is unnecessary. +</p> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/ext_jit.html b/lib/LuaJIT/doc/ext_jit.html new file mode 100644 index 0000000..73cd3c2 --- /dev/null +++ b/lib/LuaJIT/doc/ext_jit.html @@ -0,0 +1,200 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>jit.* Library</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1><tt>jit.*</tt> Library</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a class="current" href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +The functions in this built-in module control the behavior of the JIT +compiler engine. Note that JIT-compilation is fully automatic — +you probably won't need to use any of the following functions unless +you have special needs. +</p> + +<h3 id="jit_onoff"><tt>jit.on()<br> +jit.off()</tt></h3> +<p> +Turns the whole JIT compiler on (default) or off. +</p> +<p> +These functions are typically used with the command line options +<tt>-j on</tt> or <tt>-j off</tt>. +</p> + +<h3 id="jit_flush"><tt>jit.flush()</tt></h3> +<p> +Flushes the whole cache of compiled code. +</p> + +<h3 id="jit_onoff_func"><tt>jit.on(func|true [,true|false])<br> +jit.off(func|true [,true|false])<br> +jit.flush(func|true [,true|false])</tt></h3> +<p> +<tt>jit.on</tt> enables JIT compilation for a Lua function (this is +the default). +</p> +<p> +<tt>jit.off</tt> disables JIT compilation for a Lua function and +flushes any already compiled code from the code cache. +</p> +<p> +<tt>jit.flush</tt> flushes the code, but doesn't affect the +enable/disable status. +</p> +<p> +The current function, i.e. the Lua function calling this library +function, can also be specified by passing <tt>true</tt> as the first +argument. +</p> +<p> +If the second argument is <tt>true</tt>, JIT compilation is also +enabled, disabled or flushed recursively for all sub-functions of a +function. With <tt>false</tt> only the sub-functions are affected. +</p> +<p> +The <tt>jit.on</tt> and <tt>jit.off</tt> functions only set a flag +which is checked when the function is about to be compiled. They do +not trigger immediate compilation. +</p> +<p> +Typical usage is <tt>jit.off(true, true)</tt> in the main chunk +of a module to turn off JIT compilation for the whole module for +debugging purposes. +</p> + +<h3 id="jit_flush_tr"><tt>jit.flush(tr)</tt></h3> +<p> +Flushes the root trace, specified by its number, and all of its side +traces from the cache. The code for the trace will be retained as long +as there are any other traces which link to it. +</p> + +<h3 id="jit_status"><tt>status, ... = jit.status()</tt></h3> +<p> +Returns the current status of the JIT compiler. The first result is +either <tt>true</tt> or <tt>false</tt> if the JIT compiler is turned +on or off. The remaining results are strings for CPU-specific features +and enabled optimizations. +</p> + +<h3 id="jit_version"><tt>jit.version</tt></h3> +<p> +Contains the LuaJIT version string. +</p> + +<h3 id="jit_version_num"><tt>jit.version_num</tt></h3> +<p> +Contains the version number of the LuaJIT core. Version xx.yy.zz +is represented by the decimal number xxyyzz. +</p> + +<h3 id="jit_os"><tt>jit.os</tt></h3> +<p> +Contains the target OS name: +"Windows", "Linux", "OSX", "BSD", "POSIX" or "Other". +</p> + +<h3 id="jit_arch"><tt>jit.arch</tt></h3> +<p> +Contains the target architecture name: +"x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64". +</p> + +<h2 id="jit_opt"><tt>jit.opt.*</tt> — JIT compiler optimization control</h2> +<p> +This sub-module provides the backend for the <tt>-O</tt> command line +option. +</p> +<p> +You can also use it programmatically, e.g.: +</p> +<pre class="code"> +jit.opt.start(2) -- same as -O2 +jit.opt.start("-dce") +jit.opt.start("hotloop=10", "hotexit=2") +</pre> +<p> +Unlike in LuaJIT 1.x, the module is built-in and +<b>optimization is turned on by default!</b> +It's no longer necessary to run <tt>require("jit.opt").start()</tt>, +which was one of the ways to enable optimization. +</p> + +<h2 id="jit_util"><tt>jit.util.*</tt> — JIT compiler introspection</h2> +<p> +This sub-module holds functions to introspect the bytecode, generated +traces, the IR and the generated machine code. The functionality +provided by this module is still in flux and therefore undocumented. +</p> +<p> +The debug modules <tt>-jbc</tt>, <tt>-jv</tt> and <tt>-jdump</tt> make +extensive use of these functions. Please check out their source code, +if you want to know more. +</p> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/ext_profiler.html b/lib/LuaJIT/doc/ext_profiler.html new file mode 100644 index 0000000..d34ce6d --- /dev/null +++ b/lib/LuaJIT/doc/ext_profiler.html @@ -0,0 +1,364 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>Profiler</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>Profiler</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a class="current" href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +LuaJIT has an integrated statistical profiler with very low overhead. It +allows sampling the currently executing stack and other parameters in +regular intervals. +</p> +<p> +The integrated profiler can be accessed from three levels: +</p> +<ul> +<li>The <a href="#hl_profiler">bundled high-level profiler</a>, invoked by the +<a href="#j_p"><tt>-jp</tt></a> command line option.</li> +<li>A <a href="#ll_lua_api">low-level Lua API</a> to control the profiler.</li> +<li>A <a href="#ll_c_api">low-level C API</a> to control the profiler.</li> +</ul> + +<h2 id="hl_profiler">High-Level Profiler</h2> +<p> +The bundled high-level profiler offers basic profiling functionality. It +generates simple textual summaries or source code annotations. It can be +accessed with the <a href="#j_p"><tt>-jp</tt></a> command line option +or from Lua code by loading the underlying <tt>jit.p</tt> module. +</p> +<p> +To cut to the chase — run this to get a CPU usage profile by +function name: +</p> +<pre class="code"> +luajit -jp myapp.lua +</pre> +<p> +It's <em>not</em> a stated goal of the bundled profiler to add every +possible option or to cater for special profiling needs. The low-level +profiler APIs are documented below. They may be used by third-party +authors to implement advanced functionality, e.g. IDE integration or +graphical profilers. +</p> +<p> +Note: Sampling works for both interpreted and JIT-compiled code. The +results for JIT-compiled code may sometimes be surprising. LuaJIT +heavily optimizes and inlines Lua code — there's no simple +one-to-one correspondence between source code lines and the sampled +machine code. +</p> + +<h3 id="j_p"><tt>-jp=[options[,output]]</tt></h3> +<p> +The <tt>-jp</tt> command line option starts the high-level profiler. +When the application run by the command line terminates, the profiler +stops and writes the results to <tt>stdout</tt> or to the specified +<tt>output</tt> file. +</p> +<p> +The <tt>options</tt> argument specifies how the profiling is to be +performed: +</p> +<ul> +<li><tt>f</tt> — Stack dump: function name, otherwise module:line. +This is the default mode.</li> +<li><tt>F</tt> — Stack dump: ditto, but dump module:name.</li> +<li><tt>l</tt> — Stack dump: module:line.</li> +<li><tt><number></tt> — stack dump depth (callee ← +caller). Default: 1.</li> +<li><tt>-<number></tt> — Inverse stack dump depth (caller +→ callee).</li> +<li><tt>s</tt> — Split stack dump after first stack level. Implies +depth ≥ 2 or depth ≤ -2.</li> +<li><tt>p</tt> — Show full path for module names.</li> +<li><tt>v</tt> — Show VM states.</li> +<li><tt>z</tt> — Show <a href="#jit_zone">zones</a>.</li> +<li><tt>r</tt> — Show raw sample counts. Default: show percentages.</li> +<li><tt>a</tt> — Annotate excerpts from source code files.</li> +<li><tt>A</tt> — Annotate complete source code files.</li> +<li><tt>G</tt> — Produce raw output suitable for graphical tools.</li> +<li><tt>m<number></tt> — Minimum sample percentage to be shown. +Default: 3%.</li> +<li><tt>i<number></tt> — Sampling interval in milliseconds. +Default: 10ms.<br> +Note: The actual sampling precision is OS-dependent.</li> +</ul> +<p> +The default output for <tt>-jp</tt> is a list of the most CPU consuming +spots in the application. Increasing the stack dump depth with (say) +<tt>-jp=2</tt> may help to point out the main callers or callees of +hotspots. But sample aggregation is still flat per unique stack dump. +</p> +<p> +To get a two-level view (split view) of callers/callees, use +<tt>-jp=s</tt> or <tt>-jp=-s</tt>. The percentages shown for the second +level are relative to the first level. +</p> +<p> +To see how much time is spent in each line relative to a function, use +<tt>-jp=fl</tt>. +</p> +<p> +To see how much time is spent in different VM states or +<a href="#jit_zone">zones</a>, use <tt>-jp=v</tt> or <tt>-jp=z</tt>. +</p> +<p> +Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level +views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time +spent in a VM state or zone vs. hotspots. This can be used to answer +questions like "Which time consuming functions are only interpreted?" or +"What's the garbage collector overhead for a specific function?". +</p> +<p> +Multiple options can be combined — but not all combinations make +sense, see above. E.g. <tt>-jp=3si4m1</tt> samples three stack levels +deep in 4ms intervals and shows a split view of the CPU consuming +functions and their callers with a 1% threshold. +</p> +<p> +Source code annotations produced by <tt>-jp=a</tt> or <tt>-jp=A</tt> are +always flat and at the line level. Obviously, the source code files need +to be readable by the profiler script. +</p> +<p> +The high-level profiler can also be started and stopped from Lua code with: +</p> +<pre class="code"> +require("jit.p").start(options, output) +... +require("jit.p").stop() +</pre> + +<h3 id="jit_zone"><tt>jit.zone</tt> — Zones</h3> +<p> +Zones can be used to provide information about different parts of an +application to the high-level profiler. E.g. a game could make use of an +<tt>"AI"</tt> zone, a <tt>"PHYS"</tt> zone, etc. Zones are hierarchical, +organized as a stack. +</p> +<p> +The <tt>jit.zone</tt> module needs to be loaded explicitly: +</p> +<pre class="code"> +local zone = require("jit.zone") +</pre> +<ul> +<li><tt>zone("name")</tt> pushes a named zone to the zone stack.</li> +<li><tt>zone()</tt> pops the current zone from the zone stack and +returns its name.</li> +<li><tt>zone:get()</tt> returns the current zone name or <tt>nil</tt>.</li> +<li><tt>zone:flush()</tt> flushes the zone stack.</li> +</ul> +<p> +To show the time spent in each zone use <tt>-jp=z</tt>. To show the time +spent relative to hotspots use e.g. <tt>-jp=zf</tt> or <tt>-jp=fz</tt>. +</p> + +<h2 id="ll_lua_api">Low-level Lua API</h2> +<p> +The <tt>jit.profile</tt> module gives access to the low-level API of the +profiler from Lua code. This module needs to be loaded explicitly: +<pre class="code"> +local profile = require("jit.profile") +</pre> +<p> +This module can be used to implement your own higher-level profiler. +A typical profiling run starts the profiler, captures stack dumps in +the profiler callback, adds them to a hash table to aggregate the number +of samples, stops the profiler and then analyzes all of the captured +stack dumps. Other parameters can be sampled in the profiler callback, +too. But it's important not to spend too much time in the callback, +since this may skew the statistics. +</p> + +<h3 id="profile_start"><tt>profile.start(mode, cb)</tt> +— Start profiler</h3> +<p> +This function starts the profiler. The <tt>mode</tt> argument is a +string holding options: +</p> +<ul> +<li><tt>f</tt> — Profile with precision down to the function level.</li> +<li><tt>l</tt> — Profile with precision down to the line level.</li> +<li><tt>i<number></tt> — Sampling interval in milliseconds (default +10ms).</br> +Note: The actual sampling precision is OS-dependent. +</li> +</ul> +<p> +The <tt>cb</tt> argument is a callback function which is called with +three arguments: <tt>(thread, samples, vmstate)</tt>. The callback is +called on a separate coroutine, the <tt>thread</tt> argument is the +state that holds the stack to sample for profiling. Note: do +<em>not</em> modify the stack of that state or call functions on it. +</p> +<p> +<tt>samples</tt> gives the number of accumulated samples since the last +callback (usually 1). +</p> +<p> +<tt>vmstate</tt> holds the VM state at the time the profiling timer +triggered. This may or may not correspond to the state of the VM when +the profiling callback is called. The state is either <tt>'N'</tt> +native (compiled) code, <tt>'I'</tt> interpreted code, <tt>'C'</tt> +C code, <tt>'G'</tt> the garbage collector, or <tt>'J'</tt> the JIT +compiler. +</p> + +<h3 id="profile_stop"><tt>profile.stop()</tt> +— Stop profiler</h3> +<p> +This function stops the profiler. +</p> + +<h3 id="profile_dump"><tt>dump = profile.dumpstack([thread,] fmt, depth)</tt> +— Dump stack </h3> +<p> +This function allows taking stack dumps in an efficient manner. It +returns a string with a stack dump for the <tt>thread</tt> (coroutine), +formatted according to the <tt>fmt</tt> argument: +</p> +<ul> +<li><tt>p</tt> — Preserve the full path for module names. Otherwise +only the file name is used.</li> +<li><tt>f</tt> — Dump the function name if it can be derived. Otherwise +use module:line.</li> +<li><tt>F</tt> — Ditto, but dump module:name.</li> +<li><tt>l</tt> — Dump module:line.</li> +<li><tt>Z</tt> — Zap the following characters for the last dumped +frame.</li> +<li>All other characters are added verbatim to the output string.</li> +</ul> +<p> +The <tt>depth</tt> argument gives the number of frames to dump, starting +at the topmost frame of the thread. A negative number dumps the frames in +inverse order. +</p> +<p> +The first example prints a list of the current module names and line +numbers of up to 10 frames in separate lines. The second example prints +semicolon-separated function names for all frames (up to 100) in inverse +order: +</p> +<pre class="code"> +print(profile.dumpstack(thread, "l\n", 10)) +print(profile.dumpstack(thread, "lZ;", -100)) +</pre> + +<h2 id="ll_c_api">Low-level C API</h2> +<p> +The profiler can be controlled directly from C code, e.g. for +use by IDEs. The declarations are in <tt>"luajit.h"</tt> (see +<a href="ext_c_api.html">Lua/C API</a> extensions). +</p> + +<h3 id="luaJIT_profile_start"><tt>luaJIT_profile_start(L, mode, cb, data)</tt> +— Start profiler</h3> +<p> +This function starts the profiler. <a href="#profile_start">See +above</a> for a description of the <tt>mode</tt> argument. +</p> +<p> +The <tt>cb</tt> argument is a callback function with the following +declaration: +</p> +<pre class="code"> +typedef void (*luaJIT_profile_callback)(void *data, lua_State *L, + int samples, int vmstate); +</pre> +<p> +<tt>data</tt> is available for use by the callback. <tt>L</tt> is the +state that holds the stack to sample for profiling. Note: do +<em>not</em> modify this stack or call functions on this stack — +use a separate coroutine for this purpose. <a href="#profile_start">See +above</a> for a description of <tt>samples</tt> and <tt>vmstate</tt>. +</p> + +<h3 id="luaJIT_profile_stop"><tt>luaJIT_profile_stop(L)</tt> +— Stop profiler</h3> +<p> +This function stops the profiler. +</p> + +<h3 id="luaJIT_profile_dumpstack"><tt>p = luaJIT_profile_dumpstack(L, fmt, depth, len)</tt> +— Dump stack </h3> +<p> +This function allows taking stack dumps in an efficient manner. +<a href="#profile_dump">See above</a> for a description of <tt>fmt</tt> +and <tt>depth</tt>. +</p> +<p> +This function returns a <tt>const char *</tt> pointing to a +private string buffer of the profiler. The <tt>int *len</tt> +argument returns the length of the output string. The buffer is +overwritten on the next call and deallocated when the profiler stops. +You either need to consume the content immediately or copy it for later +use. +</p> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/extensions.html b/lib/LuaJIT/doc/extensions.html new file mode 100644 index 0000000..7379041 --- /dev/null +++ b/lib/LuaJIT/doc/extensions.html @@ -0,0 +1,482 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>Extensions</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +<style type="text/css"> +table.exc { + line-height: 1.2; +} +tr.exchead td { + font-weight: bold; +} +td.excplatform { + width: 48%; +} +td.exccompiler { + width: 29%; +} +td.excinterop { + width: 23%; +} +</style> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>Extensions</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a class="current" href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +LuaJIT is fully upwards-compatible with Lua 5.1. It supports all +<a href="http://www.lua.org/manual/5.1/manual.html#5"><span class="ext">»</span> standard Lua +library functions</a> and the full set of +<a href="http://www.lua.org/manual/5.1/manual.html#3"><span class="ext">»</span> Lua/C API +functions</a>. +</p> +<p> +LuaJIT is also fully ABI-compatible to Lua 5.1 at the linker/dynamic +loader level. This means you can compile a C module against the +standard Lua headers and load the same shared library from either Lua +or LuaJIT. +</p> +<p> +LuaJIT extends the standard Lua VM with new functionality and adds +several extension modules. Please note this page is only about +<em>functional</em> enhancements and not about performance enhancements, +such as the optimized VM, the faster interpreter or the JIT compiler. +</p> + +<h2 id="modules">Extensions Modules</h2> +<p> +LuaJIT comes with several built-in extension modules: +</p> + +<h3 id="bit"><tt>bit.*</tt> — Bitwise operations</h3> +<p> +LuaJIT supports all bitwise operations as defined by +<a href="http://bitop.luajit.org"><span class="ext">»</span> Lua BitOp</a>: +</p> +<pre class="code"> +bit.tobit bit.tohex bit.bnot bit.band bit.bor bit.bxor +bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap +</pre> +<p> +This module is a LuaJIT built-in — you don't need to download or +install Lua BitOp. The Lua BitOp site has full documentation for all +<a href="http://bitop.luajit.org/api.html"><span class="ext">»</span> Lua BitOp API functions</a>. +The FFI adds support for +<a href="ext_ffi_semantics.html#cdata_arith">64 bit bitwise operations</a>, +using the same API functions. +</p> +<p> +Please make sure to <tt>require</tt> the module before using any of +its functions: +</p> +<pre class="code"> +local bit = require("bit") +</pre> +<p> +An already installed Lua BitOp module is ignored by LuaJIT. +This way you can use bit operations from both Lua and LuaJIT on a +shared installation. +</p> + +<h3 id="ffi"><tt>ffi.*</tt> — FFI library</h3> +<p> +The <a href="ext_ffi.html">FFI library</a> allows calling external +C functions and the use of C data structures from pure Lua +code. +</p> + +<h3 id="jit"><tt>jit.*</tt> — JIT compiler control</h3> +<p> +The functions in this module +<a href="ext_jit.html">control the behavior of the JIT compiler engine</a>. +</p> + +<h3 id="c_api">C API extensions</h3> +<p> +LuaJIT adds some +<a href="ext_c_api.html">extra functions to the Lua/C API</a>. +</p> + +<h3 id="profiler">Profiler</h3> +<p> +LuaJIT has an <a href="ext_profiler.html">integrated profiler</a>. +</p> + +<h2 id="library">Enhanced Standard Library Functions</h2> + +<h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3> +<p> +Unlike the standard implementation in Lua 5.1, <tt>xpcall()</tt> +passes any arguments after the error function to the function +which is called in a protected context. +</p> + +<h3 id="load"><tt>loadfile()</tt> etc. handle UTF-8 source code</h3> +<p> +Non-ASCII characters are handled transparently by the Lua source code parser. +This allows the use of UTF-8 characters in identifiers and strings. +A UTF-8 BOM is skipped at the start of the source code. +</p> + +<h3 id="tostring"><tt>tostring()</tt> etc. canonicalize NaN and ±Inf</h3> +<p> +All number-to-string conversions consistently convert non-finite numbers +to the same strings on all platforms. NaN results in <tt>"nan"</tt>, +positive infinity results in <tt>"inf"</tt> and negative infinity results +in <tt>"-inf"</tt>. +</p> + +<h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3> +<p> +All string-to-number conversions consistently convert integer and +floating-point inputs in decimal, hexadecimal and binary on all platforms. +<tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous +problems with poor C library implementations. The builtin conversion +function provides full precision according to the IEEE-754 standard, it +works independently of the current locale and it supports hex floating-point +numbers (e.g. <tt>0x1.5p-3</tt>). +</p> + +<h3 id="string_dump"><tt>string.dump(f [,strip])</tt> generates portable bytecode</h3> +<p> +An extra argument has been added to <tt>string.dump()</tt>. If set to +<tt>true</tt>, 'stripped' bytecode without debug information is +generated. This speeds up later bytecode loading and reduces memory +usage. See also the +<a href="running.html#opt_b"><tt>-b</tt> command line option</a>. +</p> +<p> +The generated bytecode is portable and can be loaded on any architecture +that LuaJIT supports, independent of word size or endianess. However the +bytecode compatibility versions must match. Bytecode stays compatible +for dot releases (x.y.0 → x.y.1), but may change with major or +minor releases (2.0 → 2.1) or between any beta release. Foreign +bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded. +</p> +<p> +Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies +a different, incompatible bytecode format for ports that use this mode (e.g. +ARM64 or MIPS64) or when explicitly enabled for x64. This may be rectified +in the future. +</p> + +<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3> +<p> +An extra library function <tt>table.new()</tt> can be made available via +<tt>require("table.new")</tt>. This creates a pre-sized table, just like +the C API equivalent <tt>lua_createtable()</tt>. This is useful for big +tables if the final table size is known and automatic table resizing is +too expensive. +</p> + +<h3 id="table_clear"><tt>table.clear(tab)</tt> clears a table</h3> +<p> +An extra library function <tt>table.clear()</tt> can be made available +via <tt>require("table.clear")</tt>. This clears all keys and values +from a table, but preserves the allocated array/hash sizes. This is +useful when a table, which is linked from multiple places, needs to be +cleared and/or when recycling a table for use by the same context. This +avoids managing backlinks, saves an allocation and the overhead of +incremental array/hash part growth. +</p> +<p> +Please note this function is meant for very specific situations. In most +cases it's better to replace the (usually single) link with a new table +and let the GC do its work. +</p> + +<h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3> +<p> +LuaJIT uses a Tausworthe PRNG with period 2^223 to implement +<tt>math.random()</tt> and <tt>math.randomseed()</tt>. The quality of +the PRNG results is much superior compared to the standard Lua +implementation which uses the platform-specific ANSI rand(). +</p> +<p> +The PRNG generates the same sequences from the same seeds on all +platforms and makes use of all bits in the seed argument. +<tt>math.random()</tt> without arguments generates 52 pseudo-random bits +for every call. The result is uniformly distributed between 0.0 and 1.0. +It's correctly scaled up and rounded for <tt>math.random(n [,m])</tt> to +preserve uniformity. +</p> + +<h3 id="io"><tt>io.*</tt> functions handle 64 bit file offsets</h3> +<p> +The file I/O functions in the standard <tt>io.*</tt> library handle +64 bit file offsets. In particular this means it's possible +to open files larger than 2 Gigabytes and to reposition or obtain +the current file position for offsets beyond 2 GB +(<tt>fp:seek()</tt> method). +</p> + +<h3 id="debug_meta"><tt>debug.*</tt> functions identify metamethods</h3> +<p> +<tt>debug.getinfo()</tt> and <tt>lua_getinfo()</tt> also return information +about invoked metamethods. The <tt>namewhat</tt> field is set to +<tt>"metamethod"</tt> and the <tt>name</tt> field has the name of +the corresponding metamethod (e.g. <tt>"__index"</tt>). +</p> + +<h2 id="resumable">Fully Resumable VM</h2> +<p> +The LuaJIT VM is fully resumable. This means you can yield from a +coroutine even across contexts, where this would not possible with +the standard Lua 5.1 VM: e.g. you can yield across <tt>pcall()</tt> +and <tt>xpcall()</tt>, across iterators and across metamethods. +</p> + +<h2 id="lua52">Extensions from Lua 5.2</h2> +<p> +LuaJIT supports some language and library extensions from Lua 5.2. +Features that are unlikely to break existing code are unconditionally +enabled: +</p> +<ul> +<li><tt>goto</tt> and <tt>::labels::</tt>.</li> +<li>Hex escapes <tt>'\x3F'</tt> and <tt>'\*'</tt> escape in strings.</li> +<li><tt>load(string|reader [, chunkname [,mode [,env]]])</tt>.</li> +<li><tt>loadstring()</tt> is an alias for <tt>load()</tt>.</li> +<li><tt>loadfile(filename [,mode [,env]])</tt>.</li> +<li><tt>math.log(x [,base])</tt>.</li> +<li><tt>string.rep(s, n [,sep])</tt>.</li> +<li><tt>string.format()</tt>: <tt>%q</tt> reversible. +<tt>%s</tt> checks <tt>__tostring</tt>. +<tt>%a</tt> and <tt>"%A</tt> added.</li> +<li>String matching pattern <tt>%g</tt> added.</li> +<li><tt>io.read("*L")</tt>.</li> +<li><tt>io.lines()</tt> and <tt>file:lines()</tt> process +<tt>io.read()</tt> options.</li> +<li><tt>os.exit(status|true|false [,close])</tt>.</li> +<li><tt>package.searchpath(name, path [, sep [, rep]])</tt>.</li> +<li><tt>package.loadlib(name, "*")</tt>.</li> +<li><tt>debug.getinfo()</tt> returns <tt>nparams</tt> and <tt>isvararg</tt> +for option <tt>"u"</tt>.</li> +<li><tt>debug.getlocal()</tt> accepts function instead of level.</li> +<li><tt>debug.getlocal()</tt> and <tt>debug.setlocal()</tt> accept negative +indexes for varargs.</li> +<li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle +C functions.</li> +<li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li> +<li>Lua/C API extensions: +<tt>lua_version()</tt> +<tt>lua_upvalueid()</tt> +<tt>lua_upvaluejoin()</tt> +<tt>lua_loadx()</tt> +<tt>lua_copy()</tt> +<tt>lua_tonumberx()</tt> +<tt>lua_tointegerx()</tt> +<tt>luaL_fileresult()</tt> +<tt>luaL_execresult()</tt> +<tt>luaL_loadfilex()</tt> +<tt>luaL_loadbufferx()</tt> +<tt>luaL_traceback()</tt> +<tt>luaL_setfuncs()</tt> +<tt>luaL_pushmodule()</tt> +<tt>luaL_newlibtable()</tt> +<tt>luaL_newlib()</tt> +<tt>luaL_testudata()</tt> +<tt>luaL_setmetatable()</tt> +</li> +<li>Command line option <tt>-E</tt>.</li> +<li>Command line checks <tt>__tostring</tt> for errors.</li> +</ul> +<p> +Other features are only enabled, if LuaJIT is built with +<tt>-DLUAJIT_ENABLE_LUA52COMPAT</tt>: +</p> +<ul> +<li><tt>goto</tt> is a keyword and not a valid variable name anymore.</li> +<li><tt>break</tt> can be placed anywhere. Empty statements (<tt>;;</tt>) +are allowed.</li> +<li><tt>__lt</tt>, <tt>__le</tt> are invoked for mixed types.</li> +<li><tt>__len</tt> for tables. <tt>rawlen()</tt> library function.</li> +<li><tt>pairs()</tt> and <tt>ipairs()</tt> check for <tt>__pairs</tt> and +<tt>__ipairs</tt>.</li> +<li><tt>coroutine.running()</tt> returns two results.</li> +<li><tt>table.pack()</tt> and <tt>table.unpack()</tt> +(same as <tt>unpack()</tt>).</li> +<li><tt>io.write()</tt> and <tt>file:write()</tt> return file handle +instead of <tt>true</tt>.</li> +<li><tt>os.execute()</tt> and <tt>pipe:close()</tt> return detailed +exit status.</li> +<li><tt>debug.setmetatable()</tt> returns object.</li> +<li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li> +<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li> +<li><tt>package.searchers</tt>.</li> +<li><tt>module()</tt> returns the module table.</li> +</ul> +<p> +Note: this provides only partial compatibility with Lua 5.2 at the +language and Lua library level. LuaJIT is API+ABI-compatible with +Lua 5.1, which prevents implementing features that would otherwise +break the Lua/C API and ABI (e.g. <tt>_ENV</tt>). +</p> + +<h2 id="lua53">Extensions from Lua 5.3</h2> +<p> +LuaJIT supports some extensions from Lua 5.3: +<ul> +<li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li> +<li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li> +<li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li> +<li><tt>assert()</tt> accepts any type of error object.</li> +<li><tt>table.move(a1, f, e, t [,a2])</tt>.</li> +<li><tt>coroutine.isyieldable()</tt>.</li> +<li>Lua/C API extensions: +<tt>lua_isyieldable()</tt> +</li> +</ul> + +<h2 id="exceptions">C++ Exception Interoperability</h2> +<p> +LuaJIT has built-in support for interoperating with C++ exceptions. +The available range of features depends on the target platform and +the toolchain used to compile LuaJIT: +</p> +<table class="exc"> +<tr class="exchead"> +<td class="excplatform">Platform</td> +<td class="exccompiler">Compiler</td> +<td class="excinterop">Interoperability</td> +</tr> +<tr class="odd separate"> +<td class="excplatform">POSIX/x64, DWARF2 unwinding</td> +<td class="exccompiler">GCC 4.3+, Clang</td> +<td class="excinterop"><b style="color: #00a000;">Full</b></td> +</tr> +<tr class="even"> +<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td> +<td class="exccompiler">GCC, Clang</td> +<td class="excinterop"><b style="color: #00a000;">Full</b></td> +</tr> +<tr class="odd"> +<td class="excplatform">Other platforms, DWARF2 unwinding</td> +<td class="exccompiler">GCC, Clang</td> +<td class="excinterop"><b style="color: #c06000;">Limited</b></td> +</tr> +<tr class="even"> +<td class="excplatform">Windows/x64</td> +<td class="exccompiler">MSVC or WinSDK</td> +<td class="excinterop"><b style="color: #00a000;">Full</b></td> +</tr> +<tr class="odd"> +<td class="excplatform">Windows/x86</td> +<td class="exccompiler">Any</td> +<td class="excinterop"><b style="color: #00a000;">Full</b></td> +</tr> +<tr class="even"> +<td class="excplatform">Other platforms</td> +<td class="exccompiler">Other compilers</td> +<td class="excinterop"><b style="color: #a00000;">No</b></td> +</tr> +</table> +<p> +<b style="color: #00a000;">Full interoperability</b> means: +</p> +<ul> +<li>C++ exceptions can be caught on the Lua side with <tt>pcall()</tt>, +<tt>lua_pcall()</tt> etc.</li> +<li>C++ exceptions will be converted to the generic Lua error +<tt>"C++ exception"</tt>, unless you use the +<a href="ext_c_api.html#mode_wrapcfunc">C call wrapper</a> feature.</li> +<li>It's safe to throw C++ exceptions across non-protected Lua frames +on the C stack. The contents of the C++ exception object +pass through unmodified.</li> +<li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>. +The corresponding Lua error message can be retrieved from the Lua stack.</li> +<li>Throwing Lua errors across C++ frames is safe. C++ destructors +will be called.</li> +</ul> +<p> +<b style="color: #c06000;">Limited interoperability</b> means: +</p> +<ul> +<li>C++ exceptions can be caught on the Lua side with <tt>pcall()</tt>, +<tt>lua_pcall()</tt> etc.</li> +<li>C++ exceptions will be converted to the generic Lua error +<tt>"C++ exception"</tt>, unless you use the +<a href="ext_c_api.html#mode_wrapcfunc">C call wrapper</a> feature.</li> +<li>C++ exceptions will be caught by non-protected Lua frames and +are rethrown as a generic Lua error. The C++ exception object will +be destroyed.</li> +<li>Lua errors <b>cannot</b> be caught on the C++ side.</li> +<li>Throwing Lua errors across C++ frames will <b>not</b> call +C++ destructors.</li> +</ul> + +<p> +<b style="color: #a00000;">No interoperability</b> means: +</p> +<ul> +<li>It's <b>not</b> safe to throw C++ exceptions across Lua frames.</li> +<li>C++ exceptions <b>cannot</b> be caught on the Lua side.</li> +<li>Lua errors <b>cannot</b> be caught on the C++ side.</li> +<li>Throwing Lua errors across C++ frames will <b>not</b> call +C++ destructors.</li> +</ul> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/faq.html b/lib/LuaJIT/doc/faq.html new file mode 100644 index 0000000..ad88c49 --- /dev/null +++ b/lib/LuaJIT/doc/faq.html @@ -0,0 +1,185 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>Frequently Asked Questions (FAQ)</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +<style type="text/css"> +dd { margin-left: 1.5em; } +</style> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>Frequently Asked Questions (FAQ)</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a class="current" href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<dl> +<dt>Q: Where can I learn more about LuaJIT and Lua?</dt> +<dd> +<ul style="padding: 0;"> +<li>The <a href="http://luajit.org/list.html"><span class="ext">»</span> LuaJIT mailing list</a> focuses on topics +related to LuaJIT.</li> +<li>The <a href="http://wiki.luajit.org/"><span class="ext">»</span> LuaJIT wiki</a> gathers community +resources about LuaJIT.</li> +<li>News about Lua itself can be found at the +<a href="http://www.lua.org/lua-l.html"><span class="ext">»</span> Lua mailing list</a>. +The mailing list archives are worth checking out for older postings +about LuaJIT.</li> +<li>The <a href="http://lua.org"><span class="ext">»</span> main Lua.org site</a> has complete +<a href="http://www.lua.org/docs.html"><span class="ext">»</span> documentation</a> of the language +and links to books and papers about Lua.</li> +<li>The community-managed <a href="http://lua-users.org/wiki/"><span class="ext">»</span> Lua Wiki</a> +has information about diverse topics.</li> +</ul> +</dl> + +<dl> +<dt>Q: Where can I learn more about the compiler technology used by LuaJIT?</dt> +<dd> +I'm planning to write more documentation about the internals of LuaJIT. +In the meantime, please use the following Google Scholar searches +to find relevant papers:<br> +Search for: <a href="http://scholar.google.com/scholar?q=Trace+Compiler"><span class="ext">»</span> Trace Compiler</a><br> +Search for: <a href="http://scholar.google.com/scholar?q=JIT+Compiler"><span class="ext">»</span> JIT Compiler</a><br> +Search for: <a href="http://scholar.google.com/scholar?q=Dynamic+Language+Optimizations"><span class="ext">»</span> Dynamic Language Optimizations</a><br> +Search for: <a href="http://scholar.google.com/scholar?q=SSA+Form"><span class="ext">»</span> SSA Form</a><br> +Search for: <a href="http://scholar.google.com/scholar?q=Linear+Scan+Register+Allocation"><span class="ext">»</span> Linear Scan Register Allocation</a><br> +Here is a list of the <a href="http://article.gmane.org/gmane.comp.lang.lua.general/58908"><span class="ext">»</span> innovative features in LuaJIT</a>.<br> +And, you know, reading the source is of course the only way to enlightenment. :-) +</dd> +</dl> + +<dl> +<dt>Q: Why do I get this error: "attempt to index global 'arg' (a nil value)"?<br> +Q: My vararg functions fail after switching to LuaJIT!</dt> +<dd>LuaJIT is compatible to the Lua 5.1 language standard. It doesn't +support the implicit <tt>arg</tt> parameter for old-style vararg +functions from Lua 5.0.<br>Please convert your code to the +<a href="http://www.lua.org/manual/5.1/manual.html#2.5.9"><span class="ext">»</span> Lua 5.1 +vararg syntax</a>.</dd> +</dl> + +<dl> +<dt>Q: Why do I get this error: "bad FPU precision"?<br> +<dt>Q: I get weird behavior after initializing Direct3D.<br> +<dt>Q: Some FPU operations crash after I load a Delphi DLL.<br> +</dt> +<dd> + +DirectX/Direct3D (up to version 9) sets the x87 FPU to single-precision +mode by default. This violates the Windows ABI and interferes with the +operation of many programs — LuaJIT is affected, too. Please make +sure you always use the <tt>D3DCREATE_FPU_PRESERVE</tt> flag when +initializing Direct3D.<br> + +Direct3D version 10 or higher do not show this behavior anymore. +Consider testing your application with older versions, too.<br> + +Similarly, the Borland/Delphi runtime modifies the FPU control word and +enables FP exceptions. Of course this violates the Windows ABI, too. +Please check the Delphi docs for the Set8087CW method. + +</dl> + +<dl> +<dt>Q: Sometimes Ctrl-C fails to stop my Lua program. Why?</dt> +<dd>The interrupt signal handler sets a Lua debug hook. But this is +currently ignored by compiled code (this will eventually be fixed). If +your program is running in a tight loop and never falls back to the +interpreter, the debug hook never runs and can't throw the +"interrupted!" error.<br> In the meantime you have to press Ctrl-C +twice to get stop your program. That's similar to when it's stuck +running inside a C function under the Lua interpreter.</dd> +</dl> + +<dl> +<dt>Q: Why doesn't my favorite power-patch for Lua apply against LuaJIT?</dt> +<dd>Because it's a completely redesigned VM and has very little code +in common with Lua anymore. Also, if the patch introduces changes to +the Lua semantics, these would need to be reflected everywhere in the +VM, from the interpreter up to all stages of the compiler.<br> Please +use only standard Lua language constructs. For many common needs you +can use source transformations or use wrapper or proxy functions. +The compiler will happily optimize away such indirections.</dd> +</dl> + +<dl> +<dt>Q: Lua runs everywhere. Why doesn't LuaJIT support my CPU?</dt> +<dd>Because it's a compiler — it needs to generate native +machine code. This means the code generator must be ported to each +architecture. And the fast interpreter is written in assembler and +must be ported, too. This is quite an undertaking.<br> +The <a href="install.html">install documentation</a> shows the supported +architectures. Other architectures will follow based on sufficient user +demand and/or sponsoring.</dd> +</dl> + +<dl> +<dt>Q: When will feature X be added? When will the next version be released?</dt> +<dd>When it's ready.<br> +C'mon, it's open source — I'm doing it on my own time and you're +getting it for free. You can either contribute a patch or sponsor +the development of certain features, if they are important to you. +</dd> +</dl> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/img/contact.png b/lib/LuaJIT/doc/img/contact.png Binary files differnew file mode 100644 index 0000000..9c73dc5 --- /dev/null +++ b/lib/LuaJIT/doc/img/contact.png diff --git a/lib/LuaJIT/doc/install.html b/lib/LuaJIT/doc/install.html new file mode 100644 index 0000000..8085573 --- /dev/null +++ b/lib/LuaJIT/doc/install.html @@ -0,0 +1,691 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>Installation</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +<style type="text/css"> +table.compat { + line-height: 1.2; + font-size: 80%; +} +table.compat td { + border: 1px solid #bfcfff; + height: 2.5em; +} +table.compat tr.compathead td { + font-weight: bold; + border-bottom: 2px solid #bfcfff; +} +tr.compathead td.compatos { + vertical-align: top; +} +table.compat td.compatcpu { + width: 18%; + border-right: 2px solid #bfcfff; +} +td.compatos { + width: 21%; + vertical-align: middle; +} +td.compatno { + background-color: #d0d0d0; +} +</style> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>Installation</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a class="current" href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +LuaJIT is only distributed as a source package. This page explains +how to build and install LuaJIT with different operating systems +and C compilers. +</p> +<p> +For the impatient (on POSIX systems): +</p> +<pre class="code"> +make && sudo make install +</pre> +<p> +LuaJIT currently builds out-of-the box on most systems. +Here's the compatibility matrix for the supported combinations of +operating systems, CPUs and compilers: +</p> +<table class="compat"> +<tr class="compathead"> +<td class="compatcpu">CPU / OS</td> +<td class="compatos"><a href="#posix">Linux</a> or<br><a href="#android">Android</a></td> +<td class="compatos"><a href="#posix">*BSD, Other</a></td> +<td class="compatos"><a href="#posix">OSX 10.4+</a> or<br><a href="#ios">iOS 3.0+</a></td> +<td class="compatos"><a href="#windows">Windows<br>XP/Vista/7</a></td> +</tr> +<tr class="odd separate"> +<td class="compatcpu">x86 (32 bit)</td> +<td class="compatos">GCC 4.2+</td> +<td class="compatos">GCC 4.2+</td> +<td class="compatos">XCode 5.0+<br>Clang</td> +<td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW, Cygwin</td> +</tr> +<tr class="even"> +<td class="compatcpu">x64 (64 bit)</td> +<td class="compatos">GCC 4.2+</td> +<td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td> +<td class="compatos">XCode 5.0+<br>Clang</td> +<td class="compatos">MSVC + SDK v7.0<br>WinSDK v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td> +</tr> +<tr class="odd"> +<td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td> +<td class="compatos">GCC 4.2+</td> +<td class="compatos">GCC 4.2+<br>PSP2 (<a href="#psvita">PS VITA</a>)</td> +<td class="compatos">XCode 5.0+<br>Clang</td> +<td class="compatos compatno"> </td> +</tr> +<tr class="even"> +<td class="compatcpu"><a href="#cross2">ARM64</a></td> +<td class="compatos">GCC 4.8+</td> +<td class="compatos compatno"> </td> +<td class="compatos">XCode 6.0+<br>Clang 3.5+</td> +<td class="compatos compatno"> </td> +</tr> +<tr class="odd"> +<td class="compatcpu"><a href="#cross2">PPC</a></td> +<td class="compatos">GCC 4.3+</td> +<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td> +<td class="compatos compatno"> </td> +<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td> +</tr> +<tr class="even"> +<td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64</a></td> +<td class="compatos">GCC 4.3+</td> +<td class="compatos">GCC 4.3+</td> +<td class="compatos compatno"> </td> +<td class="compatos compatno"> </td> +</tr> +</table> + +<h2>Configuring LuaJIT</h2> +<p> +The standard configuration should work fine for most installations. +Usually there is no need to tweak the settings. The following files +hold all user-configurable settings: +</p> +<ul> +<li><tt>src/luaconf.h</tt> sets some configuration variables.</li> +<li><tt>Makefile</tt> has settings for <b>installing</b> LuaJIT (POSIX +only).</li> +<li><tt>src/Makefile</tt> has settings for <b>compiling</b> LuaJIT +under POSIX, MinGW or Cygwin.</li> +<li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with +MSVC or WinSDK.</li> +</ul> +<p> +Please read the instructions given in these files, before changing +any settings. +</p> +<p> +LuaJIT on x64 currently uses 32 bit GC objects by default. +<tt>LJ_GC64</tt> mode may be explicitly enabled: +add <tt>XCFLAGS=-DLUAJIT_ENABLE_GC64</tt> to the make command or run +<tt>msvcbuild gc64</tt> for MSVC/WinSDK. Please check the note +about the <a href="extensions.html#string_dump">bytecode format</a> +differences, too. +</p> + +<h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2> +<h3>Prerequisites</h3> +<p> +Depending on your distribution, you may need to install a package for +GCC, the development headers and/or a complete SDK. E.g. on a current +Debian/Ubuntu, install <tt>libc6-dev</tt> with the package manager. +</p> +<p> +Download the current source package of LuaJIT (pick the .tar.gz), +if you haven't already done so. Move it to a directory of your choice, +open a terminal window and change to this directory. Now unpack the archive +and change to the newly created directory: +</p> +<pre class="code"> +tar zxf LuaJIT-2.0.5.tar.gz +cd LuaJIT-2.0.5</pre> +<h3>Building LuaJIT</h3> +<p> +The supplied Makefiles try to auto-detect the settings needed for your +operating system and your compiler. They need to be run with GNU Make, +which is probably the default on your system, anyway. Simply run: +</p> +<pre class="code"> +make +</pre> +<p> +This always builds a native binary, depending on the host OS +you're running this command on. Check the section on +<a href="#cross">cross-compilation</a> for more options. +</p> +<p> +By default, modules are only searched under the prefix <tt>/usr/local</tt>. +You can add an extra prefix to the search paths by appending the +<tt>PREFIX</tt> option, e.g.: +</p> +<pre class="code"> +make PREFIX=/home/myself/lj2 +</pre> +<p> +Note for OSX: if the <tt>MACOSX_DEPLOYMENT_TARGET</tt> environment +variable is not set, then it's forced to <tt>10.4</tt>. +</p> +<h3>Installing LuaJIT</h3> +<p> +The top-level Makefile installs LuaJIT by default under +<tt>/usr/local</tt>, i.e. the executable ends up in +<tt>/usr/local/bin</tt> and so on. You need root privileges +to write to this path. So, assuming sudo is installed on your system, +run the following command and enter your sudo password: +</p> +<pre class="code"> +sudo make install +</pre> +<p> +Otherwise specify the directory prefix as an absolute path, e.g.: +</p> +<pre class="code"> +make install PREFIX=/home/myself/lj2 +</pre> +<p> +Obviously the prefixes given during build and installation need to be the same. +</p> + +<h2 id="windows">Windows Systems</h2> +<h3>Prerequisites</h3> +<p> +Either install one of the open source SDKs +(<a href="http://mingw.org/"><span class="ext">»</span> MinGW</a> or +<a href="http://www.cygwin.com/"><span class="ext">»</span> Cygwin</a>), which come with a modified +GCC plus the required development headers. +</p> +<p> +Or install Microsoft's Visual C++ (MSVC). The freely downloadable +<a href="http://www.microsoft.com/Express/VC/"><span class="ext">»</span> Express Edition</a> +works just fine, but only contains an x86 compiler. +</p> +<p> +The freely downloadable +<a href="http://msdn.microsoft.com/en-us/windowsserver/bb980924.aspx"><span class="ext">»</span> Windows SDK</a> +only comes with command line tools, but this is all you need to build LuaJIT. +It contains x86 and x64 compilers. +</p> +<p> +Next, download the source package and unpack it using an archive manager +(e.g. the Windows Explorer) to a directory of your choice. +</p> +<h3>Building with MSVC</h3> +<p> +Open a "Visual Studio .NET Command Prompt", <tt>cd</tt> to the +directory where you've unpacked the sources and run these commands: +</p> +<pre class="code"> +cd src +msvcbuild +</pre> +<p> +Then follow the installation instructions below. +</p> +<h3>Building with the Windows SDK</h3> +<p> +Open a "Windows SDK Command Shell" and select the x86 compiler: +</p> +<pre class="code"> +setenv /release /x86 +</pre> +<p> +Or select the x64 compiler: +</p> +<pre class="code"> +setenv /release /x64 +</pre> +<p> +Then <tt>cd</tt> to the directory where you've unpacked the sources +and run these commands: +</p> +<pre class="code"> +cd src +msvcbuild +</pre> +<p> +Then follow the installation instructions below. +</p> +<h3>Building with MinGW or Cygwin</h3> +<p> +Open a command prompt window and make sure the MinGW or Cygwin programs +are in your path. Then <tt>cd</tt> to the directory where +you've unpacked the sources and run this command for MinGW: +</p> +<pre class="code"> +mingw32-make +</pre> +<p> +Or this command for Cygwin: +</p> +<pre class="code"> +make +</pre> +<p> +Then follow the installation instructions below. +</p> +<h3>Installing LuaJIT</h3> +<p> +Copy <tt>luajit.exe</tt> and <tt>lua51.dll</tt> (built in the <tt>src</tt> +directory) to a newly created directory (any location is ok). +Add <tt>lua</tt> and <tt>lua\jit</tt> directories below it and copy +all Lua files from the <tt>src\jit</tt> directory of the distribution +to the latter directory. +</p> +<p> +There are no hardcoded +absolute path names — all modules are loaded relative to the +directory where <tt>luajit.exe</tt> is installed +(see <tt>src/luaconf.h</tt>). +</p> + +<h2 id="cross">Cross-compiling LuaJIT</h2> +<p> +First, let's clear up some terminology: +</p> +<ul> +<li>Host: This is your development system, usually based on a x64 or x86 CPU.</li> +<li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li> +<li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li> +<li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li> +<li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li> +</ul> +<p> +The GNU Makefile-based build system allows cross-compiling on any host +for any supported target: +</p> +<ul> +<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li> +<li>Both host and target architectures must have the same pointer size.</li> +<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li> +<li>64 bit targets always require compilation on a 64 bit host.</li> +</ul> +<p> +You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the +target OS differ, or you'll get assembler or linker errors: +</p> +<ul> +<li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li> +<li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li> +<li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li> +</ul> +<p> +Here are some examples where host and target have the same CPU: +</p> +<pre class="code"> +# Cross-compile to a 32 bit binary on a multilib x64 OS +make CC="gcc -m32" + +# Cross-compile on Debian/Ubuntu for Windows (mingw32 package) +make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows +</pre> +<p id="cross2"> +The <tt>CROSS</tt> prefix allows specifying a standard GNU cross-compile +toolchain (Binutils, GCC and a matching libc). The prefix may vary +depending on the <tt>--target</tt> the toolchain was built for (note the +<tt>CROSS</tt> prefix has a trailing <tt>"-"</tt>). The examples below +use the canonical toolchain triplets for Linux. +</p> +<p> +Since there's often no easy way to detect CPU features at runtime, it's +important to compile with the proper CPU or architecture settings: +</o> +<ul> +<li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li> +<li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li> +<li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li> +<li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li> +</ul> +<p> +Here are some examples for targets with a different CPU than the host: +</p> +<pre class="code"> +# ARM soft-float +make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ + TARGET_CFLAGS="-mfloat-abi=soft" + +# ARM soft-float ABI with VFP (example for Cortex-A9) +make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ + TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp" + +# ARM hard-float ABI with VFP (armhf, most modern toolchains) +make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf- + +# ARM64 +make CROSS=aarch64-linux- + +# PPC +make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- + +# MIPS32 big-endian +make HOST_CC="gcc -m32" CROSS=mips-linux- +# MIPS32 little-endian +make HOST_CC="gcc -m32" CROSS=mipsel-linux- + +# MIPS64 big-endian +make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" +# MIPS64 little-endian +make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" +</pre> +<p> +You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/index.html">Android NDK</a>. +The environment variables need to match the install locations and the +desired target platform. E.g. Android 4.0 corresponds to ABI level 14. +For details check the folder <tt>docs</tt> in the NDK directory. +</p> +<p> +Only a few common variations for the different CPUs, ABIs and platforms +are listed. Please use your own judgement for which combination you want +to build/deploy or which lowest common denominator you want to pick: +</p> +<pre class="code"> +# Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo) +NDK=/opt/android/ndk +NDKABI=8 +NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9 +NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi- +NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm" +make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" + +# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS) +NDK=/opt/android/ndk +NDKABI=14 +NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9 +NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi- +NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm" +NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8" +make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH" + +# Android/MIPS, mipsel (MIPS32R1 hard-float), Android 4.0+ (ICS) +NDK=/opt/android/ndk +NDKABI=14 +NDKVER=$NDK/toolchains/mipsel-linux-android-4.9 +NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android- +NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips" +make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" + +# Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS) +NDK=/opt/android/ndk +NDKABI=14 +NDKVER=$NDK/toolchains/x86-4.9 +NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android- +NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86" +make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF" +</pre> +<p> +You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/devcenter/ios/index.action"><span class="ext">»</span> iOS SDK</a>: +</p> +<p style="font-size: 8pt;"> +Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps +are not allowed to generate code at runtime. You'll only get the performance +of the LuaJIT interpreter on iOS. This is still faster than plain Lua, but +much slower than the JIT compiler. Please complain to Apple, not me. +Or use Android. :-p +</p> +<pre class="code"> +# iOS/ARM (32 bit) +ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) +ICC=$(xcrun --sdk iphoneos --find clang) +ISDKF="-arch armv7 -isysroot $ISDKP" +make DEFAULT_CC=clang HOST_CC="clang -m32 -arch i386" \ + CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS + +# iOS/ARM64 +ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) +ICC=$(xcrun --sdk iphoneos --find clang) +ISDKF="-arch arm64 -isysroot $ISDKP" +make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \ + TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS +</pre> + +<h3 id="consoles">Cross-compiling for consoles</h3> +<p> +Building LuaJIT for consoles requires both a supported host compiler +(x86 or x64) and a cross-compiler (to PPC or ARM) from the official +console SDK. +</p> +<p> +Due to restrictions on consoles, the JIT compiler is disabled and only +the fast interpreter is built. This is still faster than plain Lua, +but much slower than the JIT compiler. The FFI is disabled, too, since +it's not very useful in such an environment. +</p> +<p> +The following commands build a static library <tt>libluajit.a</tt>, +which can be linked against your game, just like the Lua library. +</p> +<p> +To cross-compile for <b id="ps3">PS3</b> from a Linux host (requires +32 bit GCC, i.e. multilib Linux/x64) or a Windows host (requires +32 bit MinGW), run this command: +</p> +<pre class="code"> +make HOST_CC="gcc -m32" CROSS=ppu-lv2- +</pre> +<p> +To cross-compile for <b id="ps4">PS4</b> from a Windows host, +open a "Visual Studio .NET Command Prompt" (64 bit host compiler), +<tt>cd</tt> to the directory where you've unpacked the sources and +run the following commands: +</p> +<pre class="code"> +cd src +ps4build +</pre> +<p> +To cross-compile for <b id="psvita">PS Vita</b> from a Windows host, +open a "Visual Studio .NET Command Prompt" (32 bit host compiler), +<tt>cd</tt> to the directory where you've unpacked the sources and +run the following commands: +</p> +<pre class="code"> +cd src +psvitabuild +</pre> +<p> +To cross-compile for <b id="xbox360">Xbox 360</b> from a Windows host, +open a "Visual Studio .NET Command Prompt" (32 bit host compiler), +<tt>cd</tt> to the directory where you've unpacked the sources and run +the following commands: +</p> +<pre class="code"> +cd src +xedkbuild +</pre> +<p> +To cross-compile for <b id="xboxone">Xbox One</b> from a Windows host, +open a "Visual Studio .NET Command Prompt" (64 bit host compiler), +<tt>cd</tt> to the directory where you've unpacked the sources and run +the following commands: +</p> +<pre class="code"> +cd src +xb1build +</pre> + +<h2 id="embed">Embedding LuaJIT</h2> +<p> +LuaJIT is API-compatible with Lua 5.1. If you've already embedded Lua +into your application, you probably don't need to do anything to switch +to LuaJIT, except link with a different library: +</p> +<ul> +<li>It's strongly suggested to build LuaJIT separately using the supplied +build system. Please do <em>not</em> attempt to integrate the individual +source files into your build tree. You'll most likely get the internal build +dependencies wrong or mess up the compiler flags. Treat LuaJIT like any +other external library and link your application with either the dynamic +or static library, depending on your needs.</li> +<li>If you want to load C modules compiled for plain Lua +with <tt>require()</tt>, you need to make sure the public symbols +(e.g. <tt>lua_pushnumber</tt>) are exported, too: +<ul><li>On POSIX systems you can either link to the shared library +or link the static library into your application. In the latter case +you'll need to export all public symbols from your main executable +(e.g. <tt>-Wl,-E</tt> on Linux) and add the external dependencies +(e.g. <tt>-lm -ldl</tt> on Linux).</li> +<li>Since Windows symbols are bound to a specific DLL name, you need to +link to the <tt>lua51.dll</tt> created by the LuaJIT build (do not rename +the DLL). You may link LuaJIT statically on Windows only if you don't +intend to load Lua/C modules at runtime. +</li></ul> +</li> +<li> +If you're building a 64 bit application on OSX which links directly or +indirectly against LuaJIT which is not built for <tt>LJ_GC64</tt> mode, +you need to link your main executable with these flags: +<pre class="code"> +-pagezero_size 10000 -image_base 100000000 +</pre> +</li> +</ul> +<p>Additional hints for initializing LuaJIT using the C API functions:</p> +<ul> +<li>Here's a +<a href="http://lua-users.org/wiki/SimpleLuaApiExample"><span class="ext">»</span> simple example</a> +for embedding Lua or LuaJIT into your application.</li> +<li>Make sure you use <tt>luaL_newstate</tt>. Avoid using +<tt>lua_newstate</tt>, since this uses the (slower) default memory +allocator from your system (no support for this on x64).</li> +<li>Make sure you use <tt>luaL_openlibs</tt> and not the old Lua 5.0 style +of calling <tt>luaopen_base</tt> etc. directly.</li> +<li>To change or extend the list of standard libraries to load, copy +<tt>src/lib_init.c</tt> to your project and modify it accordingly. +Make sure the <tt>jit</tt> library is loaded or the JIT compiler +will not be activated.</li> +<li>The <tt>bit.*</tt> module for bitwise operations +is already built-in. There's no need to statically link +<a href="http://bitop.luajit.org/"><span class="ext">»</span> Lua BitOp</a> to your application.</li> +</ul> + +<h2 id="distro">Hints for Distribution Maintainers</h2> +<p> +The LuaJIT build system has extra provisions for the needs of most +POSIX-based distributions. If you're a package maintainer for +a distribution, <em>please</em> make use of these features and +avoid patching, subverting, autotoolizing or messing up the build system +in unspeakable ways. +</p> +<p> +There should be absolutely no need to patch <tt>luaconf.h</tt> or any +of the Makefiles. And please do not hand-pick files for your packages — +simply use whatever <tt>make install</tt> creates. There's a reason +for all of the files <em>and</em> directories it creates. +</p> +<p> +The build system uses GNU make and auto-detects most settings based on +the host you're building it on. This should work fine for native builds, +even when sandboxed. You may need to pass some of the following flags to +<em>both</em> the <tt>make</tt> and the <tt>make install</tt> command lines +for a regular distribution build: +</p> +<ul> +<li><tt>PREFIX</tt> overrides the installation path and should usually +be set to <tt>/usr</tt>. Setting this also changes the module paths and +the paths needed to locate the shared library.</li> +<li><tt>DESTDIR</tt> is an absolute path which allows you to install +to a shadow tree instead of the root tree of the build system.</li> +<li><tt>MULTILIB</tt> sets the architecture-specific library path component +for multilib systems. The default is <tt>lib</tt>.</li> +<li>Have a look at the top-level <tt>Makefile</tt> and <tt>src/Makefile</tt> +for additional variables to tweak. The following variables <em>may</em> be +overridden, but it's <em>not</em> recommended, except for special needs +like cross-builds: +<tt>BUILDMODE, CC, HOST_CC, STATIC_CC, DYNAMIC_CC, CFLAGS, HOST_CFLAGS, +TARGET_CFLAGS, LDFLAGS, HOST_LDFLAGS, TARGET_LDFLAGS, TARGET_SHLDFLAGS, +TARGET_FLAGS, LIBS, HOST_LIBS, TARGET_LIBS, CROSS, HOST_SYS, TARGET_SYS +</tt></li> +</ul> +<p> +The build system has a special target for an amalgamated build, i.e. +<tt>make amalg</tt>. This compiles the LuaJIT core as one huge C file +and allows GCC to generate faster and shorter code. Alas, this requires +lots of memory during the build. This may be a problem for some users, +that's why it's not enabled by default. But it shouldn't be a problem for +most build farms. It's recommended that binary distributions use this +target for their LuaJIT builds. +</p> +<p> +The tl;dr version of the above: +</p> +<pre class="code"> +make amalg PREFIX=/usr && \ +make install PREFIX=/usr DESTDIR=/tmp/buildroot +</pre> +<p> +Finally, if you encounter any difficulties, please +<a href="contact.html">contact me</a> first, instead of releasing a broken +package onto unsuspecting users. Because they'll usually gonna complain +to me (the upstream) and not you (the package maintainer), anyway. +</p> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/luajit.html b/lib/LuaJIT/doc/luajit.html new file mode 100644 index 0000000..a85c201 --- /dev/null +++ b/lib/LuaJIT/doc/luajit.html @@ -0,0 +1,235 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>LuaJIT</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +<meta name="description" content="LuaJIT is a Just-In-Time (JIT) compiler for the Lua language."> +<style type="text/css"> +table.feature { + width: inherit; + line-height: 1.2; + margin: 0; +} +table.feature td { + width: 80px; + height: 40px; + vertical-align: middle; + text-align: center; + font-weight: bold; + border: 4px solid #e6ecff; + border-radius: 12px; +} +table.os td { + background: #7080d0; + background-image: linear-gradient(#4060c0 10%, #b0b0ff 95%); + background-image: -moz-linear-gradient(#4060c0 10%, #b0b0ff 95%); + background-image: -webkit-linear-gradient(#4060c0 10%, #b0b0ff 95%); + background-image: -o-linear-gradient(#4060c0 10%, #b0b0ff 95%); + background-image: -ms-linear-gradient(#4060c0 10%, #b0b0ff 95%); +} +table.os1 td { + color: #ffff80; +} +table.os2 td { + color: #ffa040; +} +table.os3 td { + color: #40ffff; +} +table.compiler td { + color: #2080ff; + background: #62bf41; + background-image: linear-gradient(#62bf41 10%, #b0ffb0 95%); + background-image: -moz-linear-gradient(#62bf41 10%, #b0ffb0 95%); + background-image: -webkit-linear-gradient(#62bf41 10%, #b0ffb0 95%); + background-image: -o-linear-gradient(#62bf41 10%, #b0ffb0 95%); + background-image: -ms-linear-gradient(#62bf41 10%, #b0ffb0 95%); +} +table.cpu td { + color: #ffff00; + background: #cf7251; + background-image: linear-gradient(#bf6241 10%, #ffb0b0 95%); + background-image: -moz-linear-gradient(#bf6241 10%, #ffb0b0 95%); + background-image: -webkit-linear-gradient(#bf6241 10%, #ffb0b0 95%); + background-image: -o-linear-gradient(#bf6241 10%, #ffb0b0 95%); + background-image: -ms-linear-gradient(#bf6241 10%, #ffb0b0 95%); +} +table.fcompat td { + color: #2060e0; + background: #61cfcf; + background-image: linear-gradient(#41bfbf 10%, #b0ffff 95%); + background-image: -moz-linear-gradient(#41bfbf 10%, #b0ffff 95%); + background-image: -webkit-linear-gradient(#41bfbf 10%, #b0ffff 95%); + background-image: -o-linear-gradient(#41bfbf 10%, #b0ffff 95%); + background-image: -ms-linear-gradient(#41bfbf 10%, #b0ffff 95%); +} +table.stats td { + color: #ffffff; + background: #a0a0a0; + background-image: linear-gradient(#808080 10%, #d0d0d0 95%); + background-image: -moz-linear-gradient(#808080 10%, #d0d0d0 95%); + background-image: -webkit-linear-gradient(#808080 10%, #d0d0d0 95%); + background-image: -o-linear-gradient(#808080 10%, #d0d0d0 95%); + background-image: -ms-linear-gradient(#808080 10%, #d0d0d0 95%); +} +table.stats td.speed { + color: #ff4020; +} +table.stats td.kb { + color: #ffff80; + background: #808080; + background-image: linear-gradient(#606060 10%, #c0c0c0 95%); + background-image: -moz-linear-gradient(#606060 10%, #c0c0c0 95%); + background-image: -webkit-linear-gradient(#606060 10%, #c0c0c0 95%); + background-image: -o-linear-gradient(#606060 10%, #c0c0c0 95%); + background-image: -ms-linear-gradient(#606060 10%, #c0c0c0 95%); +} +table.feature small { + font-size: 50%; +} +</style> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>LuaJIT</h1> +</div> +<div id="nav"> +<ul><li> +<a class="current" href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +LuaJIT is a <b>Just-In-Time Compiler</b> (JIT) for the +<a href="http://www.lua.org/"><span class="ext">»</span> Lua</a> programming language. +Lua is a powerful, dynamic and light-weight programming language. +It may be embedded or used as a general-purpose, stand-alone language. +</p> +<p> +LuaJIT is Copyright © 2005-2018 Mike Pall, released under the +<a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">»</span> MIT open source license</a>. +</p> +<p> +</p> + +<h2>Compatibility</h2> +<table class="feature os os1"> +<tr><td>Windows</td><td>Linux</td><td>BSD</td><td>OSX</td><td>POSIX</td></tr> +</table> +<table class="feature os os2"> +<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr> +</table> +<table class="feature os os3"> +<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr> +</table> +<table class="feature compiler"> +<tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr> +</table> +<table class="feature cpu"> +<tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr> +</table> +<table class="feature fcompat"> +<tr><td>Lua 5.1<br>API+ABI</td><td>+ JIT</td><td>+ BitOp</td><td>+ FFI</td><td>Drop-in<br>DLL/.so</td></tr> +</table> + +<h2>Overview</h2> +<table class="feature stats"> +<tr> +<td class="speed">3x<br>- 100x</td> +<td class="kb">115 <small>KB</small><br>VM</td> +<td class="kb">90 <small>KB</small><br>JIT</td> +<td class="kloc">63 <small>KLOC</small><br>C</td> +<td class="kloc">24 <small>KLOC</small><br>ASM</td> +<td class="kloc">11 <small>KLOC</small><br>Lua</td> +</tr> +</table> +<p style="margin-top: 1em;"> +LuaJIT has been successfully used as a <b>scripting middleware</b> in +games, appliances, network and graphics apps, numerical simulations, +trading platforms and many other specialty applications. It scales from +embedded devices, smartphones, desktops up to server farms. It combines +high flexibility with <a href="http://luajit.org/performance.html"><span class="ext">»</span> high performance</a> +and an unmatched <b>low memory footprint</b>. +</p> +<p> +LuaJIT has been in continuous development since 2005. It's widely +considered to be <b>one of the fastest dynamic language +implementations</b>. It has outperformed other dynamic languages on many +cross-language benchmarks since its first release — often by a +substantial margin. +</p> +<p> +For <b>LuaJIT 2.0</b>, the whole VM has been rewritten from the ground up +and relentlessly optimized for performance. It combines a <b>high-speed +interpreter</b>, written in assembler, with a <b>state-of-the-art JIT +compiler</b>. +</p> +<p> +An innovative <b>trace compiler</b> is integrated with advanced, +SSA-based optimizations and highly tuned code generation backends. +A substantial reduction of the overhead associated with dynamic languages +allows it to break into the performance range traditionally reserved for +offline, static language compilers. +</p> + +<h2>More ...</h2> +<p> +Please select a sub-topic in the navigation bar to learn more about LuaJIT. +</p> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/running.html b/lib/LuaJIT/doc/running.html new file mode 100644 index 0000000..ae3cd71 --- /dev/null +++ b/lib/LuaJIT/doc/running.html @@ -0,0 +1,308 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>Running LuaJIT</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +<style type="text/css"> +table.opt { + line-height: 1.2; +} +tr.opthead td { + font-weight: bold; +} +td.flag_name { + width: 4em; +} +td.flag_level { + width: 2em; + text-align: center; +} +td.param_name { + width: 6em; +} +td.param_default { + width: 4em; + text-align: right; +} +</style> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>Running LuaJIT</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a class="current" href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +LuaJIT has only a single stand-alone executable, called <tt>luajit</tt> on +POSIX systems or <tt>luajit.exe</tt> on Windows. It can be used to run simple +Lua statements or whole Lua applications from the command line. It has an +interactive mode, too. +</p> + +<h2 id="options">Command Line Options</h2> +<p> +The <tt>luajit</tt> stand-alone executable is just a slightly modified +version of the regular <tt>lua</tt> stand-alone executable. +It supports the same basic options, too. <tt>luajit -h</tt> +prints a short list of the available options. Please have a look at the +<a href="http://www.lua.org/manual/5.1/manual.html#6"><span class="ext">»</span> Lua manual</a> +for details. +</p> +<p> +LuaJIT has some additional options: +</p> + +<h3 id="opt_b"><tt>-b[options] input output</tt></h3> +<p> +This option saves or lists bytecode. The following additional options +are accepted: +</p> +<ul> +<li><tt>-l</tt> — Only list bytecode.</li> +<li><tt>-s</tt> — Strip debug info (this is the default).</li> +<li><tt>-g</tt> — Keep debug info.</li> +<li><tt>-n name</tt> — Set module name (default: auto-detect from input name)</li> +<li><tt>-t type</tt> — Set output file type (default: auto-detect from output name).</li> +<li><tt>-a arch</tt> — Override architecture for object files (default: native).</li> +<li><tt>-o os</tt> — Override OS for object files (default: native).</li> +<li><tt>-e chunk</tt> — Use chunk string as input.</li> +<li><tt>-</tt> (a single minus sign) — Use stdin as input and/or stdout as output.</li> +</ul> +<p> +The output file type is auto-detected from the extension of the output +file name: +</p> +<ul> +<li><tt>c</tt> — C source file, exported bytecode data.</li> +<li><tt>h</tt> — C header file, static bytecode data.</li> +<li><tt>obj</tt> or <tt>o</tt> — Object file, exported bytecode data +(OS- and architecture-specific).</li> +<li><tt>raw</tt> or any other extension — Raw bytecode file (portable). +</ul> +<p> +Notes: +</p> +<ul> +<li>See also <a href="extensions.html#string_dump">string.dump()</a> +for information on bytecode portability and compatibility.</li> +<li>A file in raw bytecode format is auto-detected and can be loaded like +any Lua source file. E.g. directly from the command line or with +<tt>loadfile()</tt>, <tt>dofile()</tt> etc.</li> +<li>To statically embed the bytecode of a module in your application, +generate an object file and just link it with your application.</li> +<li>On most ELF-based systems (e.g. Linux) you need to explicitly export the +global symbols when linking your application, e.g. with: <tt>-Wl,-E</tt></li> +<li><tt>require()</tt> tries to load embedded bytecode data from exported +symbols (in <tt>*.exe</tt> or <tt>lua51.dll</tt> on Windows) and from +shared libraries in <tt>package.cpath</tt>.</li> +</ul> +<p> +Typical usage examples: +</p> +<pre class="code"> +luajit -b test.lua test.out # Save bytecode to test.out +luajit -bg test.lua test.out # Keep debug info +luajit -be "print('hello world')" test.out # Save cmdline script + +luajit -bl test.lua # List to stdout +luajit -bl test.lua test.txt # List to test.txt +luajit -ble "print('hello world')" # List cmdline script + +luajit -b test.lua test.obj # Generate object file +# Link test.obj with your application and load it with require("test") +</pre> + +<h3 id="opt_j"><tt>-j cmd[=arg[,arg...]]</tt></h3> +<p> +This option performs a LuaJIT control command or activates one of the +loadable extension modules. The command is first looked up in the +<tt>jit.*</tt> library. If no matching function is found, a module +named <tt>jit.<cmd></tt> is loaded and the <tt>start()</tt> +function of the module is called with the specified arguments (if +any). The space between <tt>-j</tt> and <tt>cmd</tt> is optional. +</p> +<p> +Here are the available LuaJIT control commands: +</p> +<ul> +<li id="j_on"><tt>-jon</tt> — Turns the JIT compiler on (default).</li> +<li id="j_off"><tt>-joff</tt> — Turns the JIT compiler off (only use the interpreter).</li> +<li id="j_flush"><tt>-jflush</tt> — Flushes the whole cache of compiled code.</li> +<li id="j_v"><tt>-jv</tt> — Shows verbose information about the progress of the JIT compiler.</li> +<li id="j_dump"><tt>-jdump</tt> — Dumps the code and structures used in various compiler stages.</li> +<li id="j_p"><tt>-jp</tt> — Start the <a href="ext_profiler.html">integrated profiler</a>.</li> +</ul> +<p> +The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules +written in Lua. They are mainly used for debugging the JIT compiler +itself. For a description of their options and output format, please +read the comment block at the start of their source. +They can be found in the <tt>lib</tt> directory of the source +distribution or installed under the <tt>jit</tt> directory. By default +this is <tt>/usr/local/share/luajit-2.0.5/jit</tt> on POSIX +systems. +</p> + +<h3 id="opt_O"><tt>-O[level]</tt><br> +<tt>-O[+]flag</tt> <tt>-O-flag</tt><br> +<tt>-Oparam=value</tt></h3> +<p> +This options allows fine-tuned control of the optimizations used by +the JIT compiler. This is mainly intended for debugging LuaJIT itself. +Please note that the JIT compiler is extremely fast (we are talking +about the microsecond to millisecond range). Disabling optimizations +doesn't have any visible impact on its overhead, but usually generates +code that runs slower. +</p> +<p> +The first form sets an optimization level — this enables a +specific mix of optimization flags. <tt>-O0</tt> turns off all +optimizations and higher numbers enable more optimizations. Omitting +the level (i.e. just <tt>-O</tt>) sets the default optimization level, +which is <tt>-O3</tt> in the current version. +</p> +<p> +The second form adds or removes individual optimization flags. +The third form sets a parameter for the VM or the JIT compiler +to a specific value. +</p> +<p> +You can either use this option multiple times (like <tt>-Ocse +-O-dce -Ohotloop=10</tt>) or separate several settings with a comma +(like <tt>-O+cse,-dce,hotloop=10</tt>). The settings are applied from +left to right and later settings override earlier ones. You can freely +mix the three forms, but note that setting an optimization level +overrides all earlier flags. +</p> +<p> +Here are the available flags and at what optimization levels they +are enabled: +</p> +<table class="opt"> +<tr class="opthead"> +<td class="flag_name">Flag</td> +<td class="flag_level">-O1</td> +<td class="flag_level">-O2</td> +<td class="flag_level">-O3</td> +<td class="flag_desc"> </td> +</tr> +<tr class="odd separate"> +<td class="flag_name">fold</td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_desc">Constant Folding, Simplifications and Reassociation</td></tr> +<tr class="even"> +<td class="flag_name">cse</td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_desc">Common-Subexpression Elimination</td></tr> +<tr class="odd"> +<td class="flag_name">dce</td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_desc">Dead-Code Elimination</td></tr> +<tr class="even"> +<td class="flag_name">narrow</td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_desc">Narrowing of numbers to integers</td></tr> +<tr class="odd"> +<td class="flag_name">loop</td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_level">•</td><td class="flag_desc">Loop Optimizations (code hoisting)</td></tr> +<tr class="even"> +<td class="flag_name">fwd</td><td class="flag_level"> </td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_desc">Load Forwarding (L2L) and Store Forwarding (S2L)</td></tr> +<tr class="odd"> +<td class="flag_name">dse</td><td class="flag_level"> </td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_desc">Dead-Store Elimination</td></tr> +<tr class="even"> +<td class="flag_name">abc</td><td class="flag_level"> </td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_desc">Array Bounds Check Elimination</td></tr> +<tr class="odd"> +<td class="flag_name">sink</td><td class="flag_level"> </td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_desc">Allocation/Store Sinking</td></tr> +<tr class="even"> +<td class="flag_name">fuse</td><td class="flag_level"> </td><td class="flag_level"> </td><td class="flag_level">•</td><td class="flag_desc">Fusion of operands into instructions</td></tr> +</table> +<p> +Here are the parameters and their default settings: +</p> +<table class="opt"> +<tr class="opthead"> +<td class="param_name">Parameter</td> +<td class="param_default">Default</td> +<td class="param_desc"> </td> +</tr> +<tr class="odd separate"> +<td class="param_name">maxtrace</td><td class="param_default">1000</td><td class="param_desc">Max. number of traces in the cache</td></tr> +<tr class="even"> +<td class="param_name">maxrecord</td><td class="param_default">4000</td><td class="param_desc">Max. number of recorded IR instructions</td></tr> +<tr class="odd"> +<td class="param_name">maxirconst</td><td class="param_default">500</td><td class="param_desc">Max. number of IR constants of a trace</td></tr> +<tr class="even"> +<td class="param_name">maxside</td><td class="param_default">100</td><td class="param_desc">Max. number of side traces of a root trace</td></tr> +<tr class="odd"> +<td class="param_name">maxsnap</td><td class="param_default">500</td><td class="param_desc">Max. number of snapshots for a trace</td></tr> +<tr class="even separate"> +<td class="param_name">hotloop</td><td class="param_default">56</td><td class="param_desc">Number of iterations to detect a hot loop or hot call</td></tr> +<tr class="odd"> +<td class="param_name">hotexit</td><td class="param_default">10</td><td class="param_desc">Number of taken exits to start a side trace</td></tr> +<tr class="even"> +<td class="param_name">tryside</td><td class="param_default">4</td><td class="param_desc">Number of attempts to compile a side trace</td></tr> +<tr class="odd separate"> +<td class="param_name">instunroll</td><td class="param_default">4</td><td class="param_desc">Max. unroll factor for instable loops</td></tr> +<tr class="even"> +<td class="param_name">loopunroll</td><td class="param_default">15</td><td class="param_desc">Max. unroll factor for loop ops in side traces</td></tr> +<tr class="odd"> +<td class="param_name">callunroll</td><td class="param_default">3</td><td class="param_desc">Max. unroll factor for pseudo-recursive calls</td></tr> +<tr class="even"> +<td class="param_name">recunroll</td><td class="param_default">2</td><td class="param_desc">Min. unroll factor for true recursion</td></tr> +<tr class="odd separate"> +<td class="param_name">sizemcode</td><td class="param_default">32</td><td class="param_desc">Size of each machine code area in KBytes (Windows: 64K)</td></tr> +<tr class="even"> +<td class="param_name">maxmcode</td><td class="param_default">512</td><td class="param_desc">Max. total size of all machine code areas in KBytes</td></tr> +</table> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/doc/status.html b/lib/LuaJIT/doc/status.html new file mode 100644 index 0000000..e0d912e --- /dev/null +++ b/lib/LuaJIT/doc/status.html @@ -0,0 +1,122 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> +<title>Status</title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta name="Copyright" content="Copyright (C) 2005-2018"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +<style type="text/css"> +ul li { padding-bottom: 0.3em; } +</style> +</head> +<body> +<div id="site"> +<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>Status</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="http://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a class="current" href="status.html">Status</a> +<ul><li> +<a href="changes.html">Changes</a> +</li></ul> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="http://luajit.org/performance.html">Performance <span class="ext">»</span></a> +</li><li> +<a href="http://wiki.luajit.org/">Wiki <span class="ext">»</span></a> +</li><li> +<a href="http://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +<span style="color: #0000c0;">LuaJIT 2.0</span> is the current +<span style="color: #0000c0;">stable branch</span>. This branch is in +feature-freeze — new features will only be added to LuaJIT 2.1. +</p> + +<h2>Current Status</h2> +<p> +LuaJIT ought to run all Lua 5.1-compatible source code just fine. +It's considered a serious bug if the VM crashes or produces unexpected +results — please report this. +</p> +<p> +Known incompatibilities and issues in LuaJIT 2.0: +</p> +<ul> +<li> +There are some differences in <b>implementation-defined</b> behavior. +These either have a good reason, are arbitrary design choices +or are due to quirks in the VM. The latter cases may get fixed if a +demonstrable need is shown. +</li> +<li> +The Lua <b>debug API</b> is missing a couple of features (return +hooks for non-Lua functions) and shows slightly different behavior +in LuaJIT (no per-coroutine hooks, no tail call counting). +</li> +<li> +Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not +handled correctly. The error may fall through an on-trace +<tt>pcall</tt> or it may be passed on to the function set with +<tt>lua_atpanic</tt> on x64. This issue will be fixed with the new +garbage collector. +</li> +<li> +LuaJIT on 64 bit systems provides a <b>limited range</b> of 47 bits for the +<b>legacy <tt>lightuserdata</tt></b> data type. +This is only relevant on x64 systems which use the negative part of the +virtual address space in user mode, e.g. Solaris/x64, and on ARM64 systems +configured with a 48 bit or 52 bit VA. +Avoid using <tt>lightuserdata</tt> to hold pointers that may point outside +of that range, e.g. variables on the stack. In general, avoid this data +type for new code and replace it with (much more performant) FFI bindings. +FFI cdata pointers can address the full 64 bit range. +</li> +</ul> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2018 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/lib/LuaJIT/dynasm/dasm_arm.h b/lib/LuaJIT/dynasm/dasm_arm.h new file mode 100644 index 0000000..1d404cc --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_arm.h @@ -0,0 +1,458 @@ +/* +** DynASM ARM encoding engine. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include <stddef.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> + +#define DASM_ARCH "arm" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, + DASM_IMM, DASM_IMM12, DASM_IMM16, DASM_IMML8, DASM_IMML12, DASM_IMMV8, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +static int dasm_imm12(unsigned int n) +{ + int i; + for (i = 0; i < 16; i++, n = (n << 2) | (n >> 30)) + if (n <= 255) return (int)(n + (i << 8)); + return -1; +} + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: + case DASM_IMM16: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); + if ((ins & 0x8000)) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMMV8: + CK((n & 3) == 0, RANGE_I); + n >>= 2; + /* fallthrough */ + case DASM_IMML8: + case DASM_IMML12: + CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) : + (((-n)>>((ins>>5)&31)) == 0), RANGE_I); + b[pos++] = n; + break; + case DASM_IMM12: + CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMM12: case DASM_IMM16: + case DASM_IMML8: case DASM_IMML12: case DASM_IMMV8: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048)); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4; + patchrel: + if ((ins & 0x800) == 0) { + CK((n & 3) == 0 && ((n+0x02000000) >> 26) == 0, RANGE_REL); + cp[-1] |= ((n >> 2) & 0x00ffffff); + } else if ((ins & 0x1000)) { + CK((n & 3) == 0 && -256 <= n && n <= 256, RANGE_REL); + goto patchimml8; + } else if ((ins & 0x2000) == 0) { + CK((n & 3) == 0 && -4096 <= n && n <= 4096, RANGE_REL); + goto patchimml; + } else { + CK((n & 3) == 0 && -1020 <= n && n <= 1020, RANGE_REL); + n >>= 2; + goto patchimml; + } + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + case DASM_IMM12: + cp[-1] |= dasm_imm12((unsigned int)n); + break; + case DASM_IMM16: + cp[-1] |= ((n & 0xf000) << 4) | (n & 0x0fff); + break; + case DASM_IMML8: patchimml8: + cp[-1] |= n >= 0 ? (0x00800000 | (n & 0x0f) | ((n & 0xf0) << 4)) : + ((-n & 0x0f) | ((-n & 0xf0) << 4)); + break; + case DASM_IMML12: case DASM_IMMV8: patchimml: + cp[-1] |= n >= 0 ? (0x00800000 | n) : (-n); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/lib/LuaJIT/dynasm/dasm_arm.lua b/lib/LuaJIT/dynasm/dasm_arm.lua new file mode 100644 index 0000000..32f595a --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_arm.lua @@ -0,0 +1,1125 @@ +------------------------------------------------------------------------------ +-- DynASM ARM module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "arm", + description = "DynASM ARM module", + version = "1.4.0", + vernum = 10400, + release = "2015-10-18", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable, rawget = assert, setmetatable, rawget +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub +local concat, sort, insert = table.concat, table.sort, table.insert +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local ror, tohex = bit.ror, bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12", "IMMV8", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0x000fffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + if n <= 0x000fffff then + insert(actlist, pos+1, n) + n = map_action.ESC * 0x10000 + end + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. + +-- Ext. register name -> int. name. +local map_archdef = { sp = "r13", lr = "r14", pc = "r15", } + +-- Int. register name -> ext. name. +local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", } + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + return map_reg_rev[s] or s +end + +local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, } + +local map_cond = { + eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, + hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, + hs = 2, lo = 3, +} + +------------------------------------------------------------------------------ + +-- Template strings for ARM instructions. +local map_op = { + -- Basic data processing instructions. + and_3 = "e0000000DNPs", + eor_3 = "e0200000DNPs", + sub_3 = "e0400000DNPs", + rsb_3 = "e0600000DNPs", + add_3 = "e0800000DNPs", + adc_3 = "e0a00000DNPs", + sbc_3 = "e0c00000DNPs", + rsc_3 = "e0e00000DNPs", + tst_2 = "e1100000NP", + teq_2 = "e1300000NP", + cmp_2 = "e1500000NP", + cmn_2 = "e1700000NP", + orr_3 = "e1800000DNPs", + mov_2 = "e1a00000DPs", + bic_3 = "e1c00000DNPs", + mvn_2 = "e1e00000DPs", + + and_4 = "e0000000DNMps", + eor_4 = "e0200000DNMps", + sub_4 = "e0400000DNMps", + rsb_4 = "e0600000DNMps", + add_4 = "e0800000DNMps", + adc_4 = "e0a00000DNMps", + sbc_4 = "e0c00000DNMps", + rsc_4 = "e0e00000DNMps", + tst_3 = "e1100000NMp", + teq_3 = "e1300000NMp", + cmp_3 = "e1500000NMp", + cmn_3 = "e1700000NMp", + orr_4 = "e1800000DNMps", + mov_3 = "e1a00000DMps", + bic_4 = "e1c00000DNMps", + mvn_3 = "e1e00000DMps", + + lsl_3 = "e1a00000DMws", + lsr_3 = "e1a00020DMws", + asr_3 = "e1a00040DMws", + ror_3 = "e1a00060DMws", + rrx_2 = "e1a00060DMs", + + -- Multiply and multiply-accumulate. + mul_3 = "e0000090NMSs", + mla_4 = "e0200090NMSDs", + umaal_4 = "e0400090DNMSs", -- v6 + mls_4 = "e0600090DNMSs", -- v6T2 + umull_4 = "e0800090DNMSs", + umlal_4 = "e0a00090DNMSs", + smull_4 = "e0c00090DNMSs", + smlal_4 = "e0e00090DNMSs", + + -- Halfword multiply and multiply-accumulate. + smlabb_4 = "e1000080NMSD", -- v5TE + smlatb_4 = "e10000a0NMSD", -- v5TE + smlabt_4 = "e10000c0NMSD", -- v5TE + smlatt_4 = "e10000e0NMSD", -- v5TE + smlawb_4 = "e1200080NMSD", -- v5TE + smulwb_3 = "e12000a0NMS", -- v5TE + smlawt_4 = "e12000c0NMSD", -- v5TE + smulwt_3 = "e12000e0NMS", -- v5TE + smlalbb_4 = "e1400080NMSD", -- v5TE + smlaltb_4 = "e14000a0NMSD", -- v5TE + smlalbt_4 = "e14000c0NMSD", -- v5TE + smlaltt_4 = "e14000e0NMSD", -- v5TE + smulbb_3 = "e1600080NMS", -- v5TE + smultb_3 = "e16000a0NMS", -- v5TE + smulbt_3 = "e16000c0NMS", -- v5TE + smultt_3 = "e16000e0NMS", -- v5TE + + -- Miscellaneous data processing instructions. + clz_2 = "e16f0f10DM", -- v5T + rev_2 = "e6bf0f30DM", -- v6 + rev16_2 = "e6bf0fb0DM", -- v6 + revsh_2 = "e6ff0fb0DM", -- v6 + sel_3 = "e6800fb0DNM", -- v6 + usad8_3 = "e780f010NMS", -- v6 + usada8_4 = "e7800010NMSD", -- v6 + rbit_2 = "e6ff0f30DM", -- v6T2 + movw_2 = "e3000000DW", -- v6T2 + movt_2 = "e3400000DW", -- v6T2 + -- Note: the X encodes width-1, not width. + sbfx_4 = "e7a00050DMvX", -- v6T2 + ubfx_4 = "e7e00050DMvX", -- v6T2 + -- Note: the X encodes the msb field, not the width. + bfc_3 = "e7c0001fDvX", -- v6T2 + bfi_4 = "e7c00010DMvX", -- v6T2 + + -- Packing and unpacking instructions. + pkhbt_3 = "e6800010DNM", pkhbt_4 = "e6800010DNMv", -- v6 + pkhtb_3 = "e6800050DNM", pkhtb_4 = "e6800050DNMv", -- v6 + sxtab_3 = "e6a00070DNM", sxtab_4 = "e6a00070DNMv", -- v6 + sxtab16_3 = "e6800070DNM", sxtab16_4 = "e6800070DNMv", -- v6 + sxtah_3 = "e6b00070DNM", sxtah_4 = "e6b00070DNMv", -- v6 + sxtb_2 = "e6af0070DM", sxtb_3 = "e6af0070DMv", -- v6 + sxtb16_2 = "e68f0070DM", sxtb16_3 = "e68f0070DMv", -- v6 + sxth_2 = "e6bf0070DM", sxth_3 = "e6bf0070DMv", -- v6 + uxtab_3 = "e6e00070DNM", uxtab_4 = "e6e00070DNMv", -- v6 + uxtab16_3 = "e6c00070DNM", uxtab16_4 = "e6c00070DNMv", -- v6 + uxtah_3 = "e6f00070DNM", uxtah_4 = "e6f00070DNMv", -- v6 + uxtb_2 = "e6ef0070DM", uxtb_3 = "e6ef0070DMv", -- v6 + uxtb16_2 = "e6cf0070DM", uxtb16_3 = "e6cf0070DMv", -- v6 + uxth_2 = "e6ff0070DM", uxth_3 = "e6ff0070DMv", -- v6 + + -- Saturating instructions. + qadd_3 = "e1000050DMN", -- v5TE + qsub_3 = "e1200050DMN", -- v5TE + qdadd_3 = "e1400050DMN", -- v5TE + qdsub_3 = "e1600050DMN", -- v5TE + -- Note: the X for ssat* encodes sat_imm-1, not sat_imm. + ssat_3 = "e6a00010DXM", ssat_4 = "e6a00010DXMp", -- v6 + usat_3 = "e6e00010DXM", usat_4 = "e6e00010DXMp", -- v6 + ssat16_3 = "e6a00f30DXM", -- v6 + usat16_3 = "e6e00f30DXM", -- v6 + + -- Parallel addition and subtraction. + sadd16_3 = "e6100f10DNM", -- v6 + sasx_3 = "e6100f30DNM", -- v6 + ssax_3 = "e6100f50DNM", -- v6 + ssub16_3 = "e6100f70DNM", -- v6 + sadd8_3 = "e6100f90DNM", -- v6 + ssub8_3 = "e6100ff0DNM", -- v6 + qadd16_3 = "e6200f10DNM", -- v6 + qasx_3 = "e6200f30DNM", -- v6 + qsax_3 = "e6200f50DNM", -- v6 + qsub16_3 = "e6200f70DNM", -- v6 + qadd8_3 = "e6200f90DNM", -- v6 + qsub8_3 = "e6200ff0DNM", -- v6 + shadd16_3 = "e6300f10DNM", -- v6 + shasx_3 = "e6300f30DNM", -- v6 + shsax_3 = "e6300f50DNM", -- v6 + shsub16_3 = "e6300f70DNM", -- v6 + shadd8_3 = "e6300f90DNM", -- v6 + shsub8_3 = "e6300ff0DNM", -- v6 + uadd16_3 = "e6500f10DNM", -- v6 + uasx_3 = "e6500f30DNM", -- v6 + usax_3 = "e6500f50DNM", -- v6 + usub16_3 = "e6500f70DNM", -- v6 + uadd8_3 = "e6500f90DNM", -- v6 + usub8_3 = "e6500ff0DNM", -- v6 + uqadd16_3 = "e6600f10DNM", -- v6 + uqasx_3 = "e6600f30DNM", -- v6 + uqsax_3 = "e6600f50DNM", -- v6 + uqsub16_3 = "e6600f70DNM", -- v6 + uqadd8_3 = "e6600f90DNM", -- v6 + uqsub8_3 = "e6600ff0DNM", -- v6 + uhadd16_3 = "e6700f10DNM", -- v6 + uhasx_3 = "e6700f30DNM", -- v6 + uhsax_3 = "e6700f50DNM", -- v6 + uhsub16_3 = "e6700f70DNM", -- v6 + uhadd8_3 = "e6700f90DNM", -- v6 + uhsub8_3 = "e6700ff0DNM", -- v6 + + -- Load/store instructions. + str_2 = "e4000000DL", str_3 = "e4000000DL", str_4 = "e4000000DL", + strb_2 = "e4400000DL", strb_3 = "e4400000DL", strb_4 = "e4400000DL", + ldr_2 = "e4100000DL", ldr_3 = "e4100000DL", ldr_4 = "e4100000DL", + ldrb_2 = "e4500000DL", ldrb_3 = "e4500000DL", ldrb_4 = "e4500000DL", + strh_2 = "e00000b0DL", strh_3 = "e00000b0DL", + ldrh_2 = "e01000b0DL", ldrh_3 = "e01000b0DL", + ldrd_2 = "e00000d0DL", ldrd_3 = "e00000d0DL", -- v5TE + ldrsb_2 = "e01000d0DL", ldrsb_3 = "e01000d0DL", + strd_2 = "e00000f0DL", strd_3 = "e00000f0DL", -- v5TE + ldrsh_2 = "e01000f0DL", ldrsh_3 = "e01000f0DL", + + ldm_2 = "e8900000oR", ldmia_2 = "e8900000oR", ldmfd_2 = "e8900000oR", + ldmda_2 = "e8100000oR", ldmfa_2 = "e8100000oR", + ldmdb_2 = "e9100000oR", ldmea_2 = "e9100000oR", + ldmib_2 = "e9900000oR", ldmed_2 = "e9900000oR", + stm_2 = "e8800000oR", stmia_2 = "e8800000oR", stmfd_2 = "e8800000oR", + stmda_2 = "e8000000oR", stmfa_2 = "e8000000oR", + stmdb_2 = "e9000000oR", stmea_2 = "e9000000oR", + stmib_2 = "e9800000oR", stmed_2 = "e9800000oR", + pop_1 = "e8bd0000R", push_1 = "e92d0000R", + + -- Branch instructions. + b_1 = "ea000000B", + bl_1 = "eb000000B", + blx_1 = "e12fff30C", + bx_1 = "e12fff10M", + + -- Miscellaneous instructions. + nop_0 = "e1a00000", + mrs_1 = "e10f0000D", + bkpt_1 = "e1200070K", -- v5T + svc_1 = "ef000000T", swi_1 = "ef000000T", + ud_0 = "e7f001f0", + + -- VFP instructions. + ["vadd.f32_3"] = "ee300a00dnm", + ["vadd.f64_3"] = "ee300b00Gdnm", + ["vsub.f32_3"] = "ee300a40dnm", + ["vsub.f64_3"] = "ee300b40Gdnm", + ["vmul.f32_3"] = "ee200a00dnm", + ["vmul.f64_3"] = "ee200b00Gdnm", + ["vnmul.f32_3"] = "ee200a40dnm", + ["vnmul.f64_3"] = "ee200b40Gdnm", + ["vmla.f32_3"] = "ee000a00dnm", + ["vmla.f64_3"] = "ee000b00Gdnm", + ["vmls.f32_3"] = "ee000a40dnm", + ["vmls.f64_3"] = "ee000b40Gdnm", + ["vnmla.f32_3"] = "ee100a40dnm", + ["vnmla.f64_3"] = "ee100b40Gdnm", + ["vnmls.f32_3"] = "ee100a00dnm", + ["vnmls.f64_3"] = "ee100b00Gdnm", + ["vdiv.f32_3"] = "ee800a00dnm", + ["vdiv.f64_3"] = "ee800b00Gdnm", + + ["vabs.f32_2"] = "eeb00ac0dm", + ["vabs.f64_2"] = "eeb00bc0Gdm", + ["vneg.f32_2"] = "eeb10a40dm", + ["vneg.f64_2"] = "eeb10b40Gdm", + ["vsqrt.f32_2"] = "eeb10ac0dm", + ["vsqrt.f64_2"] = "eeb10bc0Gdm", + ["vcmp.f32_2"] = "eeb40a40dm", + ["vcmp.f64_2"] = "eeb40b40Gdm", + ["vcmpe.f32_2"] = "eeb40ac0dm", + ["vcmpe.f64_2"] = "eeb40bc0Gdm", + ["vcmpz.f32_1"] = "eeb50a40d", + ["vcmpz.f64_1"] = "eeb50b40Gd", + ["vcmpze.f32_1"] = "eeb50ac0d", + ["vcmpze.f64_1"] = "eeb50bc0Gd", + + vldr_2 = "ed100a00dl|ed100b00Gdl", + vstr_2 = "ed000a00dl|ed000b00Gdl", + vldm_2 = "ec900a00or", + vldmia_2 = "ec900a00or", + vldmdb_2 = "ed100a00or", + vpop_1 = "ecbd0a00r", + vstm_2 = "ec800a00or", + vstmia_2 = "ec800a00or", + vstmdb_2 = "ed000a00or", + vpush_1 = "ed2d0a00r", + + ["vmov.f32_2"] = "eeb00a40dm|eeb00a00dY", -- #imm is VFPv3 only + ["vmov.f64_2"] = "eeb00b40Gdm|eeb00b00GdY", -- #imm is VFPv3 only + vmov_2 = "ee100a10Dn|ee000a10nD", + vmov_3 = "ec500a10DNm|ec400a10mDN|ec500b10GDNm|ec400b10GmDN", + + vmrs_0 = "eef1fa10", + vmrs_1 = "eef10a10D", + vmsr_1 = "eee10a10D", + + ["vcvt.s32.f32_2"] = "eebd0ac0dm", + ["vcvt.s32.f64_2"] = "eebd0bc0dGm", + ["vcvt.u32.f32_2"] = "eebc0ac0dm", + ["vcvt.u32.f64_2"] = "eebc0bc0dGm", + ["vcvtr.s32.f32_2"] = "eebd0a40dm", + ["vcvtr.s32.f64_2"] = "eebd0b40dGm", + ["vcvtr.u32.f32_2"] = "eebc0a40dm", + ["vcvtr.u32.f64_2"] = "eebc0b40dGm", + ["vcvt.f32.s32_2"] = "eeb80ac0dm", + ["vcvt.f64.s32_2"] = "eeb80bc0GdFm", + ["vcvt.f32.u32_2"] = "eeb80a40dm", + ["vcvt.f64.u32_2"] = "eeb80b40GdFm", + ["vcvt.f32.f64_2"] = "eeb70bc0dGm", + ["vcvt.f64.f32_2"] = "eeb70ac0GdFm", + + -- VFPv4 only: + ["vfma.f32_3"] = "eea00a00dnm", + ["vfma.f64_3"] = "eea00b00Gdnm", + ["vfms.f32_3"] = "eea00a40dnm", + ["vfms.f64_3"] = "eea00b40Gdnm", + ["vfnma.f32_3"] = "ee900a40dnm", + ["vfnma.f64_3"] = "ee900b40Gdnm", + ["vfnms.f32_3"] = "ee900a00dnm", + ["vfnms.f64_3"] = "ee900b00Gdnm", + + -- NYI: Advanced SIMD instructions. + + -- NYI: I have no need for these instructions right now: + -- swp, swpb, strex, ldrex, strexd, ldrexd, strexb, ldrexb, strexh, ldrexh + -- msr, nopv6, yield, wfe, wfi, sev, dbg, bxj, smc, srs, rfe + -- cps, setend, pli, pld, pldw, clrex, dsb, dmb, isb + -- stc, ldc, mcr, mcr2, mrc, mrc2, mcrr, mcrr2, mrrc, mrrc2, cdp, cdp2 +} + +-- Add mnemonics for "s" variants. +do + local t = {} + for k,v in pairs(map_op) do + if sub(v, -1) == "s" then + local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2) + t[sub(k, 1, -3).."s"..sub(k, -2)] = v2 + end + end + for k,v in pairs(t) do + map_op[k] = v + end +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r(1?[0-9])$") + if r then + r = tonumber(r) + if r <= 15 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_gpr_pm(expr) + local pm, expr2 = match(expr, "^([+-]?)(.*)$") + return parse_gpr(expr2), (pm == "-") +end + +local function parse_vr(expr, tp) + local t, r = match(expr, "^([sd])([0-9]+)$") + if t == tp then + r = tonumber(r) + if r <= 31 then + if t == "s" then return shr(r, 1), band(r, 1) end + return band(r, 15), shr(r, 4) + end + end + werror("bad register name `"..expr.."'") +end + +local function parse_reglist(reglist) + reglist = match(reglist, "^{%s*([^}]*)}$") + if not reglist then werror("register list expected") end + local rr = 0 + for p in gmatch(reglist..",", "%s*([^,]*),") do + local rbit = shl(1, parse_gpr(gsub(p, "%s+$", ""))) + if band(rr, rbit) ~= 0 then + werror("duplicate register `"..p.."'") + end + rr = rr + rbit + end + return rr +end + +local function parse_vrlist(reglist) + local ta, ra, tb, rb = match(reglist, + "^{%s*([sd])([0-9]+)%s*%-%s*([sd])([0-9]+)%s*}$") + ra, rb = tonumber(ra), tonumber(rb) + if ta and ta == tb and ra and rb and ra <= 31 and rb <= 31 and ra <= rb then + local nr = rb+1 - ra + if ta == "s" then + return shl(shr(ra,1),12)+shl(band(ra,1),22) + nr + else + return shl(band(ra,15),12)+shl(shr(ra,4),22) + nr*2 + 0x100 + end + end + werror("register list expected") +end + +local function parse_imm(imm, bits, shift, scale, signed) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = tonumber(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_imm12(imm) + local n = tonumber(imm) + if n then + local m = band(n) + for i=0,-15,-1 do + if shr(m, 8) == 0 then return m + shl(band(i, 15), 8) end + m = ror(m, 2) + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM12", 0, imm) + return 0 + end +end + +local function parse_imm16(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = tonumber(imm) + if n then + if shr(n, 16) == 0 then return band(n, 0x0fff) + shl(band(n, 0xf000), 4) end + werror("out of range immediate `"..imm.."'") + else + waction("IMM16", 32*16, imm) + return 0 + end +end + +local function parse_imm_load(imm, ext) + local n = tonumber(imm) + if n then + if ext then + if n >= -255 and n <= 255 then + local up = 0x00800000 + if n < 0 then n = -n; up = 0 end + return shl(band(n, 0xf0), 4) + band(n, 0x0f) + up + end + else + if n >= -4095 and n <= 4095 then + if n >= 0 then return n+0x00800000 end + return -n + end + end + werror("out of range immediate `"..imm.."'") + else + waction(ext and "IMML8" or "IMML12", 32768 + shl(ext and 8 or 12, 5), imm) + return 0 + end +end + +local function parse_shift(shift, gprok) + if shift == "rrx" then + return 3 * 32 + else + local s, s2 = match(shift, "^(%S+)%s*(.*)$") + s = map_shift[s] + if not s then werror("expected shift operand") end + if sub(s2, 1, 1) == "#" then + return parse_imm(s2, 5, 7, 0, false) + shl(s, 5) + else + if not gprok then werror("expected immediate shift operand") end + return shl(parse_gpr(s2), 8) + shl(s, 5) + 16 + end + end +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +local function parse_load(params, nparams, n, op) + local oplo = band(op, 255) + local ext, ldrd = (oplo ~= 0), (oplo == 208) + local d + if (ldrd or oplo == 240) then + d = band(shr(op, 12), 15) + if band(d, 1) ~= 0 then werror("odd destination register") end + end + local pn = params[n] + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + local p2 = params[n+1] + if not p1 then + if not p2 then + if match(pn, "^[<>=%-]") or match(pn, "^extern%s+") then + local mode, n, s = parse_label(pn, false) + waction("REL_"..mode, n + (ext and 0x1800 or 0x0800), s, 1) + return op + 15 * 65536 + 0x01000000 + (ext and 0x00400000 or 0) + end + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local d, tp = parse_gpr(reg) + if tp then + waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12), + format(tp.ctypefmt, tailr)) + return op + shl(d, 16) + 0x01000000 + (ext and 0x00400000 or 0) + end + end + end + werror("expected address operand") + end + if wb == "!" then op = op + 0x00200000 end + if p2 then + if wb == "!" then werror("bad use of '!'") end + local p3 = params[n+2] + op = op + shl(parse_gpr(p1), 16) + local imm = match(p2, "^#(.*)$") + if imm then + local m = parse_imm_load(imm, ext) + if p3 then werror("too many parameters") end + op = op + m + (ext and 0x00400000 or 0) + else + local m, neg = parse_gpr_pm(p2) + if ldrd and (m == d or m-1 == d) then werror("register conflict") end + op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000) + if p3 then op = op + parse_shift(p3) end + end + else + local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$") + op = op + shl(parse_gpr(p1a), 16) + 0x01000000 + if p2 ~= "" then + local imm = match(p2, "^,%s*#(.*)$") + if imm then + local m = parse_imm_load(imm, ext) + op = op + m + (ext and 0x00400000 or 0) + else + local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$") + local m, neg = parse_gpr_pm(p2a) + if ldrd and (m == d or m-1 == d) then werror("register conflict") end + op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000) + if p3 ~= "" then + if ext then werror("too many parameters") end + op = op + parse_shift(p3) + end + end + else + if wb == "!" then werror("bad use of '!'") end + op = op + (ext and 0x00c00000 or 0x00800000) + end + end + return op +end + +local function parse_vload(q) + local reg, imm = match(q, "^%[%s*([^,%s]*)%s*(.*)%]$") + if reg then + local d = shl(parse_gpr(reg), 16) + if imm == "" then return d end + imm = match(imm, "^,%s*#(.*)$") + if imm then + local n = tonumber(imm) + if n then + if n >= -1020 and n <= 1020 and n%4 == 0 then + return d + (n >= 0 and n/4+0x00800000 or -n/4) + end + werror("out of range immediate `"..imm.."'") + else + waction("IMMV8", 32768 + 32*8, imm) + return d + end + end + else + if match(q, "^[<>=%-]") or match(q, "^extern%s+") then + local mode, n, s = parse_label(q, false) + waction("REL_"..mode, n + 0x2800, s, 1) + return 15 * 65536 + end + local reg, tailr = match(q, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local d, tp = parse_gpr(reg) + if tp then + waction("IMMV8", 32768 + 32*8, format(tp.ctypefmt, tailr)) + return shl(d, 16) + end + end + end + werror("expected address operand") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +local function parse_template(params, template, nparams, pos) + local op = tonumber(sub(template, 1, 8), 16) + local n = 1 + local vr = "s" + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + local q = params[n] + if p == "D" then + op = op + shl(parse_gpr(q), 12); n = n + 1 + elseif p == "N" then + op = op + shl(parse_gpr(q), 16); n = n + 1 + elseif p == "S" then + op = op + shl(parse_gpr(q), 8); n = n + 1 + elseif p == "M" then + op = op + parse_gpr(q); n = n + 1 + elseif p == "d" then + local r,h = parse_vr(q, vr); op = op+shl(r,12)+shl(h,22); n = n + 1 + elseif p == "n" then + local r,h = parse_vr(q, vr); op = op+shl(r,16)+shl(h,7); n = n + 1 + elseif p == "m" then + local r,h = parse_vr(q, vr); op = op+r+shl(h,5); n = n + 1 + elseif p == "P" then + local imm = match(q, "^#(.*)$") + if imm then + op = op + parse_imm12(imm) + 0x02000000 + else + op = op + parse_gpr(q) + end + n = n + 1 + elseif p == "p" then + op = op + parse_shift(q, true); n = n + 1 + elseif p == "L" then + op = parse_load(params, nparams, n, op) + elseif p == "l" then + op = op + parse_vload(q) + elseif p == "B" then + local mode, n, s = parse_label(q, false) + waction("REL_"..mode, n, s, 1) + elseif p == "C" then -- blx gpr vs. blx label. + if match(q, "^([%w_]+):(r1?[0-9])$") or match(q, "^r(1?[0-9])$") then + op = op + parse_gpr(q) + else + if op < 0xe0000000 then werror("unconditional instruction") end + local mode, n, s = parse_label(q, false) + waction("REL_"..mode, n, s, 1) + op = 0xfa000000 + end + elseif p == "F" then + vr = "s" + elseif p == "G" then + vr = "d" + elseif p == "o" then + local r, wb = match(q, "^([^!]*)(!?)$") + op = op + shl(parse_gpr(r), 16) + (wb == "!" and 0x00200000 or 0) + n = n + 1 + elseif p == "R" then + op = op + parse_reglist(q); n = n + 1 + elseif p == "r" then + op = op + parse_vrlist(q); n = n + 1 + elseif p == "W" then + op = op + parse_imm16(q); n = n + 1 + elseif p == "v" then + op = op + parse_imm(q, 5, 7, 0, false); n = n + 1 + elseif p == "w" then + local imm = match(q, "^#(.*)$") + if imm then + op = op + parse_imm(q, 5, 7, 0, false); n = n + 1 + else + op = op + shl(parse_gpr(q), 8) + 16 + end + elseif p == "X" then + op = op + parse_imm(q, 5, 16, 0, false); n = n + 1 + elseif p == "Y" then + local imm = tonumber(match(q, "^#(.*)$")); n = n + 1 + if not imm or shr(imm, 8) ~= 0 then + werror("bad immediate operand") + end + op = op + shl(band(imm, 0xf0), 12) + band(imm, 0x0f) + elseif p == "K" then + local imm = tonumber(match(q, "^#(.*)$")); n = n + 1 + if not imm or shr(imm, 16) ~= 0 then + werror("bad immediate operand") + end + op = op + shl(band(imm, 0xfff0), 4) + band(imm, 0x000f) + elseif p == "T" then + op = op + parse_imm(q, 24, 0, 0, false); n = n + 1 + elseif p == "s" then + -- Ignored. + else + assert(false) + end + end + wputpos(pos, op) +end + +map_op[".template__"] = function(params, template, nparams) + if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 3 positions. + if secpos+3 > maxsecpos then wflush() end + local pos = wpos() + local lpos, apos, spos = #actlist, #actargs, secpos + + local ok, err + for t in gmatch(template, "[^|]+") do + ok, err = pcall(parse_template, params, t, nparams, pos) + if ok then return end + secpos = spos + actlist[lpos+1] = nil + actlist[lpos+2] = nil + actlist[lpos+3] = nil + actargs[apos+1] = nil + actargs[apos+2] = nil + actargs[apos+3] = nil + end + error(err, 0) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = function(t, k) + local v = map_coreop[k] + if v then return v end + local k1, cc, k2 = match(k, "^(.-)(..)([._].*)$") + local cv = map_cond[cc] + if cv then + local v = rawget(t, k1..k2) + if type(v) == "string" then + local scv = format("%x", cv) + return gsub(scv..sub(v, 2), "|e", "|"..scv) + end + end + end }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/lib/LuaJIT/dynasm/dasm_arm64.h b/lib/LuaJIT/dynasm/dasm_arm64.h new file mode 100644 index 0000000..ff21236 --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_arm64.h @@ -0,0 +1,519 @@ +/* +** DynASM ARM64 encoding engine. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include <stddef.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> + +#define DASM_ARCH "arm64" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, + DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +static int dasm_imm12(unsigned int n) +{ + if ((n >> 12) == 0) + return n; + else if ((n & 0xff000fff) == 0) + return (n >> 12) | 0x1000; + else + return -1; +} + +static int dasm_ffs(unsigned long long x) +{ + int n = -1; + while (x) { x >>= 1; n++; } + return n; +} + +static int dasm_imm13(int lo, int hi) +{ + int inv = 0, w = 64, s = 0xfff, xa, xb; + unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo; + unsigned long long m = 1ULL, a, b, c; + if (n & 1) { n = ~n; inv = 1; } + a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b); + xa = dasm_ffs(a); xb = dasm_ffs(b); + if (c) { + w = dasm_ffs(c) - xa; + if (w == 32) m = 0x0000000100000001UL; + else if (w == 16) m = 0x0001000100010001UL; + else if (w == 8) m = 0x0101010101010101UL; + else if (w == 4) m = 0x1111111111111111UL; + else if (w == 2) m = 0x5555555555555555UL; + else return -1; + s = (-2*w & 0x3f) - 1; + } else if (!a) { + return -1; + } else if (xb == -1) { + xb = 64; + } + if ((b-a) * m != n) return -1; + if (inv) { + return ((w - xb) << 6) | (s+w+xa-xb); + } else { + return ((w - xa) << 6) | (s+xb-xa); + } + return -1; +} + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); + n >>= ((ins>>10)&31); +#ifdef DASM_CHECKS + if ((ins & 0x8000)) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMM6: + CK((n >> 6) == 0, RANGE_I); + b[pos++] = n; + break; + case DASM_IMM12: + CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); + b[pos++] = n; + break; + case DASM_IMM13W: + CK(dasm_imm13(n, n) != -1, RANGE_I); + b[pos++] = n; + break; + case DASM_IMM13X: { + int m = va_arg(ap, int); + CK(dasm_imm13(n, m) != -1, RANGE_I); + b[pos++] = n; + b[pos++] = m; + break; + } + case DASM_IMML: { +#ifdef DASM_CHECKS + int scale = (p[-2] >> 30); + CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) || + (unsigned int)(n+256) < 512, RANGE_I); +#endif + b[pos++] = n; + break; + } + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W: + case DASM_IMML: pos++; break; + case DASM_IMM13X: pos += 2; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048)); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4; + patchrel: + if (!(ins & 0xf800)) { /* B, BL */ + CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL); + cp[-1] |= ((n >> 2) & 0x03ffffff); + } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ + CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL); + cp[-1] |= ((n << 3) & 0x00ffffe0); + } else if ((ins & 0x3000) == 0x2000) { /* ADR */ + CK(((n+0x00100000) >> 21) == 0, RANGE_REL); + cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29); + } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ + cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29); + } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ + CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL); + cp[-1] |= ((n << 3) & 0x0007ffe0); + } + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + case DASM_IMM6: + cp[-1] |= ((n&31) << 19) | ((n&32) << 26); + break; + case DASM_IMM12: + cp[-1] |= (dasm_imm12((unsigned int)n) << 10); + break; + case DASM_IMM13W: + cp[-1] |= (dasm_imm13(n, n) << 10); + break; + case DASM_IMM13X: + cp[-1] |= (dasm_imm13(n, *b++) << 10); + break; + case DASM_IMML: { + int scale = (p[-2] >> 30); + cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ? + ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12); + break; + } + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/lib/LuaJIT/dynasm/dasm_arm64.lua b/lib/LuaJIT/dynasm/dasm_arm64.lua new file mode 100644 index 0000000..8a5f735 --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_arm64.lua @@ -0,0 +1,1166 @@ +------------------------------------------------------------------------------ +-- DynASM ARM64 module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "arm", + description = "DynASM ARM64 module", + version = "1.4.0", + vernum = 10400, + release = "2015-10-18", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable, rawget = assert, setmetatable, rawget +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub +local concat, sort, insert = table.concat, table.sort, table.insert +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local ror, tohex = bit.ror, bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0x000fffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + if n <= 0x000fffff then + insert(actlist, pos+1, n) + n = map_action.ESC * 0x10000 + end + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. + +-- Ext. register name -> int. name. +local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", } + +-- Int. register name -> ext. name. +local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", } + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + return map_reg_rev[s] or s +end + +local map_shift = { lsl = 0, lsr = 1, asr = 2, } + +local map_extend = { + uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3, + sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7, +} + +local map_cond = { + eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, + hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, + hs = 2, lo = 3, +} + +------------------------------------------------------------------------------ + +local parse_reg_type + +local function parse_reg(expr) + if not expr then werror("expected register name") end + local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$") + if r then + r = tonumber(r) + if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then + if not parse_reg_type then + parse_reg_type = rt + elseif parse_reg_type ~= rt then + werror("register size mismatch") + end + return r, tp + end + end + werror("bad register name `"..expr.."'") +end + +local function parse_reg_base(expr) + if expr == "sp" then return 0x3e0 end + local base, tp = parse_reg(expr) + if parse_reg_type ~= "x" then werror("bad register type") end + parse_reg_type = false + return shl(base, 5), tp +end + +local parse_ctx = {} + +local loadenv = setfenv and function(s) + local code = loadstring(s, "") + if code then setfenv(code, parse_ctx) end + return code +end or function(s) + return load(s, "", nil, parse_ctx) +end + +-- Try to parse simple arithmetic, too, since some basic ops are aliases. +local function parse_number(n) + local x = tonumber(n) + if x then return x end + local code = loadenv("return "..n) + if code then + local ok, y = pcall(code) + if ok then return y end + end + return nil +end + +local function parse_imm(imm, bits, shift, scale, signed) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_imm12(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + if shr(n, 12) == 0 then + return shl(n, 10) + elseif band(n, 0xff000fff) == 0 then + return shr(n, 2) + 0x00400000 + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM12", 0, imm) + return 0 + end +end + +local function parse_imm13(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + local r64 = parse_reg_type == "x" + if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then + local inv = false + if band(n, 1) == 1 then n = bit.bnot(n); inv = true end + local t = {} + for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end + local b = table.concat(t) + b = b..(r64 and (inv and "1" or "0"):rep(32) or b) + local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)") + if p0 then + local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a + if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then + local s = band(-2*w, 0x3f) - 1 + if w == 64 then s = s + 0x1000 end + if inv then + return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10) + else + return shl(w-#p0, 16) + shl(s+#p1, 10) + end + end + end + werror("out of range immediate `"..imm.."'") + elseif r64 then + waction("IMM13X", 0, format("(unsigned int)(%s)", imm)) + actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm) + return 0 + else + waction("IMM13W", 0, imm) + return 0 + end +end + +local function parse_imm6(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + if n >= 0 and n <= 63 then + return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0) + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM6", 0, imm) + return 0 + end +end + +local function parse_imm_load(imm, scale) + local n = parse_number(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n and m >= 0 and m < 0x1000 then + return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset. + elseif n >= -256 and n < 256 then + return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset. + end + werror("out of range immediate `"..imm.."'") + else + waction("IMML", 0, imm) + return 0 + end +end + +local function parse_fpimm(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + local m, e = math.frexp(n) + local s, e2 = 0, band(e-2, 7) + if m < 0 then m = -m; s = 0x00100000 end + m = m*32-16 + if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then + return s + shl(e2, 17) + shl(m, 13) + end + werror("out of range immediate `"..imm.."'") + else + werror("NYI fpimm action") + end +end + +local function parse_shift(expr) + local s, s2 = match(expr, "^(%S+)%s*(.*)$") + s = map_shift[s] + if not s then werror("expected shift operand") end + return parse_imm(s2, 6, 10, 0, false) + shl(s, 22) +end + +local function parse_lslx16(expr) + local n = match(expr, "^lsl%s*#(%d+)$") + n = tonumber(n) + if not n then werror("expected shift operand") end + if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then + werror("bad shift amount") + end + return shl(n, 17) +end + +local function parse_extend(expr) + local s, s2 = match(expr, "^(%S+)%s*(.*)$") + if s == "lsl" then + s = parse_reg_type == "x" and 3 or 2 + else + s = map_extend[s] + end + if not s then werror("expected extend operand") end + return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13) +end + +local function parse_cond(expr, inv) + local c = map_cond[expr] + if not c then werror("expected condition operand") end + return shl(bit.bxor(c, inv), 12) +end + +local function parse_load(params, nparams, n, op) + if params[n+2] then werror("too many operands") end + local pn, p2 = params[n], params[n+1] + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + if not p1 then + if not p2 then + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local base, tp = parse_reg_base(reg) + if tp then + waction("IMML", 0, format(tp.ctypefmt, tailr)) + return op + base + end + end + end + werror("expected address operand") + end + local scale = shr(op, 30) + if p2 then + if wb == "!" then werror("bad use of '!'") end + op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400 + elseif wb == "!" then + local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") + if not p1a then werror("bad use of '!'") end + op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00 + else + local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$") + op = op + parse_reg_base(p1a) + if p2a ~= "" then + local imm = match(p2a, "^,%s*#(.*)$") + if imm then + op = op + parse_imm_load(imm, scale) + else + local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$") + op = op + shl(parse_reg(p2b), 16) + 0x00200800 + if parse_reg_type ~= "x" and parse_reg_type ~= "w" then + werror("bad index register type") + end + if p3b == "" then + if parse_reg_type ~= "x" then werror("bad index register type") end + op = op + 0x6000 + else + if p3s == "" or p3s == "#0" then + elseif p3s == "#"..scale then + op = op + 0x1000 + else + werror("bad scale") + end + if parse_reg_type == "x" then + if p3b == "lsl" and p3s ~= "" then op = op + 0x6000 + elseif p3b == "sxtx" then op = op + 0xe000 + else + werror("bad extend/shift specifier") + end + else + if p3b == "uxtw" then op = op + 0x4000 + elseif p3b == "sxtw" then op = op + 0xc000 + else + werror("bad extend/shift specifier") + end + end + end + end + else + if wb == "!" then werror("bad use of '!'") end + op = op + 0x01000000 + end + end + return op +end + +local function parse_load_pair(params, nparams, n, op) + if params[n+2] then werror("too many operands") end + local pn, p2 = params[n], params[n+1] + local scale = shr(op, 30) == 0 and 2 or 3 + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + if not p1 then + if not p2 then + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local base, tp = parse_reg_base(reg) + if tp then + waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr)) + return op + base + 0x01000000 + end + end + end + werror("expected address operand") + end + if p2 then + if wb == "!" then werror("bad use of '!'") end + op = op + 0x00800000 + else + local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") + if p1a then p1, p2 = p1a, p2a else p2 = "#0" end + op = op + (wb == "!" and 0x01800000 or 0x01000000) + end + return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true) +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +local function branch_type(op) + if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL + elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or + band(op, 0x3b000000) == 0x18000000 then + return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal + elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ + elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR + elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP + else + assert(false, "unknown branch type") + end +end + +------------------------------------------------------------------------------ + +local map_op, op_template + +local function op_alias(opname, f) + return function(params, nparams) + if not params then return "-> "..opname:sub(1, -3) end + f(params, nparams) + op_template(params, map_op[opname], nparams) + end +end + +local function alias_bfx(p) + p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1" +end + +local function alias_bfiz(p) + parse_reg(p[1]) + if parse_reg_type == "w" then + p[3] = "#-("..p[3]:sub(2)..")%32" + p[4] = "#("..p[4]:sub(2)..")-1" + else + p[3] = "#-("..p[3]:sub(2)..")%64" + p[4] = "#("..p[4]:sub(2)..")-1" + end +end + +local alias_lslimm = op_alias("ubfm_4", function(p) + parse_reg(p[1]) + local sh = p[3]:sub(2) + if parse_reg_type == "w" then + p[3] = "#-("..sh..")%32" + p[4] = "#31-("..sh..")" + else + p[3] = "#-("..sh..")%64" + p[4] = "#63-("..sh..")" + end +end) + +-- Template strings for ARM instructions. +map_op = { + -- Basic data processing instructions. + add_3 = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx", + add_4 = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX", + adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx", + adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX", + cmn_2 = "2b00001fNMg|3100001fpNIg|ab20601fpNMx", + cmn_3 = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX", + + sub_3 = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx", + sub_4 = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX", + subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx", + subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX", + cmp_2 = "6b00001fNMg|7100001fpNIg|eb20601fpNMx", + cmp_3 = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX", + + neg_2 = "4b0003e0DMg", + neg_3 = "4b0003e0DMSg", + negs_2 = "6b0003e0DMg", + negs_3 = "6b0003e0DMSg", + + adc_3 = "1a000000DNMg", + adcs_3 = "3a000000DNMg", + sbc_3 = "5a000000DNMg", + sbcs_3 = "7a000000DNMg", + ngc_2 = "5a0003e0DMg", + ngcs_2 = "7a0003e0DMg", + + and_3 = "0a000000DNMg|12000000pDNig", + and_4 = "0a000000DNMSg", + orr_3 = "2a000000DNMg|32000000pDNig", + orr_4 = "2a000000DNMSg", + eor_3 = "4a000000DNMg|52000000pDNig", + eor_4 = "4a000000DNMSg", + ands_3 = "6a000000DNMg|72000000DNig", + ands_4 = "6a000000DNMSg", + tst_2 = "6a00001fNMg|7200001fNig", + tst_3 = "6a00001fNMSg", + + bic_3 = "0a200000DNMg", + bic_4 = "0a200000DNMSg", + orn_3 = "2a200000DNMg", + orn_4 = "2a200000DNMSg", + eon_3 = "4a200000DNMg", + eon_4 = "4a200000DNMSg", + bics_3 = "6a200000DNMg", + bics_4 = "6a200000DNMSg", + + movn_2 = "12800000DWg", + movn_3 = "12800000DWRg", + movz_2 = "52800000DWg", + movz_3 = "52800000DWRg", + movk_2 = "72800000DWg", + movk_3 = "72800000DWRg", + + -- TODO: this doesn't cover all valid immediates for mov reg, #imm. + mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg", + mov_3 = "2a0003e0DMSg", + mvn_2 = "2a2003e0DMg", + mvn_3 = "2a2003e0DMSg", + + adr_2 = "10000000DBx", + adrp_2 = "90000000DBx", + + csel_4 = "1a800000DNMCg", + csinc_4 = "1a800400DNMCg", + csinv_4 = "5a800000DNMCg", + csneg_4 = "5a800400DNMCg", + cset_2 = "1a9f07e0Dcg", + csetm_2 = "5a9f03e0Dcg", + cinc_3 = "1a800400DNmcg", + cinv_3 = "5a800000DNmcg", + cneg_3 = "5a800400DNmcg", + + ccmn_4 = "3a400000NMVCg|3a400800N5VCg", + ccmp_4 = "7a400000NMVCg|7a400800N5VCg", + + madd_4 = "1b000000DNMAg", + msub_4 = "1b008000DNMAg", + mul_3 = "1b007c00DNMg", + mneg_3 = "1b00fc00DNMg", + + smaddl_4 = "9b200000DxNMwAx", + smsubl_4 = "9b208000DxNMwAx", + smull_3 = "9b207c00DxNMw", + smnegl_3 = "9b20fc00DxNMw", + smulh_3 = "9b407c00DNMx", + umaddl_4 = "9ba00000DxNMwAx", + umsubl_4 = "9ba08000DxNMwAx", + umull_3 = "9ba07c00DxNMw", + umnegl_3 = "9ba0fc00DxNMw", + umulh_3 = "9bc07c00DNMx", + + udiv_3 = "1ac00800DNMg", + sdiv_3 = "1ac00c00DNMg", + + -- Bit operations. + sbfm_4 = "13000000DN12w|93400000DN12x", + bfm_4 = "33000000DN12w|b3400000DN12x", + ubfm_4 = "53000000DN12w|d3400000DN12x", + extr_4 = "13800000DNM2w|93c00000DNM2x", + + sxtb_2 = "13001c00DNw|93401c00DNx", + sxth_2 = "13003c00DNw|93403c00DNx", + sxtw_2 = "93407c00DxNw", + uxtb_2 = "53001c00DNw", + uxth_2 = "53003c00DNw", + + sbfx_4 = op_alias("sbfm_4", alias_bfx), + bfxil_4 = op_alias("bfm_4", alias_bfx), + ubfx_4 = op_alias("ubfm_4", alias_bfx), + sbfiz_4 = op_alias("sbfm_4", alias_bfiz), + bfi_4 = op_alias("bfm_4", alias_bfiz), + ubfiz_4 = op_alias("ubfm_4", alias_bfiz), + + lsl_3 = function(params, nparams) + if params and params[3]:byte() == 35 then + return alias_lslimm(params, nparams) + else + return op_template(params, "1ac02000DNMg", nparams) + end + end, + lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x", + asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x", + ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x", + + clz_2 = "5ac01000DNg", + cls_2 = "5ac01400DNg", + rbit_2 = "5ac00000DNg", + rev_2 = "5ac00800DNw|dac00c00DNx", + rev16_2 = "5ac00400DNg", + rev32_2 = "dac00800DNx", + + -- Loads and stores. + ["strb_*"] = "38000000DwL", + ["ldrb_*"] = "38400000DwL", + ["ldrsb_*"] = "38c00000DwL|38800000DxL", + ["strh_*"] = "78000000DwL", + ["ldrh_*"] = "78400000DwL", + ["ldrsh_*"] = "78c00000DwL|78800000DxL", + ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL", + ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL", + ["ldrsw_*"] = "98000000DxB|b8800000DxL", + -- NOTE: ldur etc. are handled by ldr et al. + + ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP", + ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP", + ["ldpsw_*"] = "68400000DAxP", + + -- Branches. + b_1 = "14000000B", + bl_1 = "94000000B", + blr_1 = "d63f0000Nx", + br_1 = "d61f0000Nx", + ret_0 = "d65f03c0", + ret_1 = "d65f0000Nx", + -- b.cond is added below. + cbz_2 = "34000000DBg", + cbnz_2 = "35000000DBg", + tbz_3 = "36000000DTBw|36000000DTBx", + tbnz_3 = "37000000DTBw|37000000DTBx", + + -- Miscellaneous instructions. + -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr + -- TODO: sys, sysl, ic, dc, at, tlbi + -- TODO: hint, yield, wfe, wfi, sev, sevl + -- TODO: clrex, dsb, dmb, isb + nop_0 = "d503201f", + brk_0 = "d4200000", + brk_1 = "d4200000W", + + -- Floating point instructions. + fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf", + fabs_2 = "1e20c000DNf", + fneg_2 = "1e214000DNf", + fsqrt_2 = "1e21c000DNf", + + fcvt_2 = "1e22c000DdNs|1e624000DsNd", + + -- TODO: half-precision and fixed-point conversions. + fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd", + fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd", + fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd", + fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd", + fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd", + fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd", + fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd", + fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd", + fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd", + fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd", + + scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx", + ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx", + + frintn_2 = "1e244000DNf", + frintp_2 = "1e24c000DNf", + frintm_2 = "1e254000DNf", + frintz_2 = "1e25c000DNf", + frinta_2 = "1e264000DNf", + frintx_2 = "1e274000DNf", + frinti_2 = "1e27c000DNf", + + fadd_3 = "1e202800DNMf", + fsub_3 = "1e203800DNMf", + fmul_3 = "1e200800DNMf", + fnmul_3 = "1e208800DNMf", + fdiv_3 = "1e201800DNMf", + + fmadd_4 = "1f000000DNMAf", + fmsub_4 = "1f008000DNMAf", + fnmadd_4 = "1f200000DNMAf", + fnmsub_4 = "1f208000DNMAf", + + fmax_3 = "1e204800DNMf", + fmaxnm_3 = "1e206800DNMf", + fmin_3 = "1e205800DNMf", + fminnm_3 = "1e207800DNMf", + + fcmp_2 = "1e202000NMf|1e202008NZf", + fcmpe_2 = "1e202010NMf|1e202018NZf", + + fccmp_4 = "1e200400NMVCf", + fccmpe_4 = "1e200410NMVCf", + + fcsel_4 = "1e200c00DNMCf", + + -- TODO: crc32*, aes*, sha*, pmull + -- TODO: SIMD instructions. +} + +for cond,c in pairs(map_cond) do + map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B" +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +local function parse_template(params, template, nparams, pos) + local op = tonumber(sub(template, 1, 8), 16) + local n = 1 + local rtt = {} + + parse_reg_type = false + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + local q = params[n] + if p == "D" then + op = op + parse_reg(q); n = n + 1 + elseif p == "N" then + op = op + shl(parse_reg(q), 5); n = n + 1 + elseif p == "M" then + op = op + shl(parse_reg(q), 16); n = n + 1 + elseif p == "A" then + op = op + shl(parse_reg(q), 10); n = n + 1 + elseif p == "m" then + op = op + shl(parse_reg(params[n-1]), 16) + + elseif p == "p" then + if q == "sp" then params[n] = "@x31" end + elseif p == "g" then + if parse_reg_type == "x" then + op = op + 0x80000000 + elseif parse_reg_type ~= "w" then + werror("bad register type") + end + parse_reg_type = false + elseif p == "f" then + if parse_reg_type == "d" then + op = op + 0x00400000 + elseif parse_reg_type ~= "s" then + werror("bad register type") + end + parse_reg_type = false + elseif p == "x" or p == "w" or p == "d" or p == "s" then + if parse_reg_type ~= p then + werror("register size mismatch") + end + parse_reg_type = false + + elseif p == "L" then + op = parse_load(params, nparams, n, op) + elseif p == "P" then + op = parse_load_pair(params, nparams, n, op) + + elseif p == "B" then + local mode, v, s = parse_label(q, false); n = n + 1 + local m = branch_type(op) + waction("REL_"..mode, v+m, s, 1) + + elseif p == "I" then + op = op + parse_imm12(q); n = n + 1 + elseif p == "i" then + op = op + parse_imm13(q); n = n + 1 + elseif p == "W" then + op = op + parse_imm(q, 16, 5, 0, false); n = n + 1 + elseif p == "T" then + op = op + parse_imm6(q); n = n + 1 + elseif p == "1" then + op = op + parse_imm(q, 6, 16, 0, false); n = n + 1 + elseif p == "2" then + op = op + parse_imm(q, 6, 10, 0, false); n = n + 1 + elseif p == "5" then + op = op + parse_imm(q, 5, 16, 0, false); n = n + 1 + elseif p == "V" then + op = op + parse_imm(q, 4, 0, 0, false); n = n + 1 + elseif p == "F" then + op = op + parse_fpimm(q); n = n + 1 + elseif p == "Z" then + if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end + n = n + 1 + + elseif p == "S" then + op = op + parse_shift(q); n = n + 1 + elseif p == "X" then + op = op + parse_extend(q); n = n + 1 + elseif p == "R" then + op = op + parse_lslx16(q); n = n + 1 + elseif p == "C" then + op = op + parse_cond(q, 0); n = n + 1 + elseif p == "c" then + op = op + parse_cond(q, 1); n = n + 1 + + else + assert(false) + end + end + wputpos(pos, op) +end + +function op_template(params, template, nparams) + if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 3 positions. + if secpos+3 > maxsecpos then wflush() end + local pos = wpos() + local lpos, apos, spos = #actlist, #actargs, secpos + + local ok, err + for t in gmatch(template, "[^|]+") do + ok, err = pcall(parse_template, params, t, nparams, pos) + if ok then return end + secpos = spos + actlist[lpos+1] = nil + actlist[lpos+2] = nil + actlist[lpos+3] = nil + actargs[apos+1] = nil + actargs[apos+2] = nil + actargs[apos+3] = nil + end + error(err, 0) +end + +map_op[".template__"] = op_template + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/lib/LuaJIT/dynasm/dasm_mips.h b/lib/LuaJIT/dynasm/dasm_mips.h new file mode 100644 index 0000000..71a835b --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_mips.h @@ -0,0 +1,420 @@ +/* +** DynASM MIPS encoding engine. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include <stddef.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> + +#define DASM_ARCH "mips" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16) - 0xff00; + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: case DASM_IMMS: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); +#endif + n >>= ((ins>>10)&31); +#ifdef DASM_CHECKS + if (ins & 0x8000) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16) - 0xff00; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMMS: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16) - 0xff00; + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n); + if (ins & 2048) + n = n - (int)((char *)cp - base); + else + n = (n + (int)(size_t)base) & 0x0fffffff; + patchrel: + CK((n & 3) == 0 && + ((n + ((ins & 2048) ? 0x00020000 : 0)) >> + ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL); + cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff)); + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMMS: + cp[-1] |= ((n>>3) & 4); n &= 0x1f; + /* fallthrough */ + case DASM_IMM: + cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/lib/LuaJIT/dynasm/dasm_mips.lua b/lib/LuaJIT/dynasm/dasm_mips.lua new file mode 100644 index 0000000..bd2a2b4 --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_mips.lua @@ -0,0 +1,1008 @@ +------------------------------------------------------------------------------ +-- DynASM MIPS32/MIPS64 module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +local mips64 = mips64 + +-- Module information: +local _info = { + arch = mips64 and "mips64" or "mips", + description = "DynASM MIPS32/MIPS64 module", + version = "1.4.0", + vernum = 10400, + release = "2016-05-24", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable = assert, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch = _s.match, _s.gmatch +local concat, sort = table.concat, table.sort +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local tohex = bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMMS", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(0xff000000 + w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n >= 0xff000000 then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = { sp="r29", ra="r31" } -- Ext. register name -> int. name. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + if s == "r29" then return "sp" + elseif s == "r31" then return "ra" end + return s +end + +------------------------------------------------------------------------------ + +-- Template strings for MIPS instructions. +local map_op = { + -- First-level opcodes. + j_1 = "08000000J", + jal_1 = "0c000000J", + b_1 = "10000000B", + beqz_2 = "10000000SB", + beq_3 = "10000000STB", + bnez_2 = "14000000SB", + bne_3 = "14000000STB", + blez_2 = "18000000SB", + bgtz_2 = "1c000000SB", + addi_3 = "20000000TSI", + li_2 = "24000000TI", + addiu_3 = "24000000TSI", + slti_3 = "28000000TSI", + sltiu_3 = "2c000000TSI", + andi_3 = "30000000TSU", + lu_2 = "34000000TU", + ori_3 = "34000000TSU", + xori_3 = "38000000TSU", + lui_2 = "3c000000TU", + beqzl_2 = "50000000SB", + beql_3 = "50000000STB", + bnezl_2 = "54000000SB", + bnel_3 = "54000000STB", + blezl_2 = "58000000SB", + bgtzl_2 = "5c000000SB", + daddi_3 = mips64 and "60000000TSI", + daddiu_3 = mips64 and "64000000TSI", + ldl_2 = mips64 and "68000000TO", + ldr_2 = mips64 and "6c000000TO", + lb_2 = "80000000TO", + lh_2 = "84000000TO", + lwl_2 = "88000000TO", + lw_2 = "8c000000TO", + lbu_2 = "90000000TO", + lhu_2 = "94000000TO", + lwr_2 = "98000000TO", + lwu_2 = mips64 and "9c000000TO", + sb_2 = "a0000000TO", + sh_2 = "a4000000TO", + swl_2 = "a8000000TO", + sw_2 = "ac000000TO", + sdl_2 = mips64 and "b0000000TO", + sdr_2 = mips64 and "b1000000TO", + swr_2 = "b8000000TO", + cache_2 = "bc000000NO", + ll_2 = "c0000000TO", + lwc1_2 = "c4000000HO", + pref_2 = "cc000000NO", + ldc1_2 = "d4000000HO", + ld_2 = mips64 and "dc000000TO", + sc_2 = "e0000000TO", + swc1_2 = "e4000000HO", + scd_2 = mips64 and "f0000000TO", + sdc1_2 = "f4000000HO", + sd_2 = mips64 and "fc000000TO", + + -- Opcode SPECIAL. + nop_0 = "00000000", + sll_3 = "00000000DTA", + sextw_2 = "00000000DT", + movf_2 = "00000001DS", + movf_3 = "00000001DSC", + movt_2 = "00010001DS", + movt_3 = "00010001DSC", + srl_3 = "00000002DTA", + rotr_3 = "00200002DTA", + sra_3 = "00000003DTA", + sllv_3 = "00000004DTS", + srlv_3 = "00000006DTS", + rotrv_3 = "00000046DTS", + drotrv_3 = mips64 and "00000056DTS", + srav_3 = "00000007DTS", + jr_1 = "00000008S", + jalr_1 = "0000f809S", + jalr_2 = "00000009DS", + movz_3 = "0000000aDST", + movn_3 = "0000000bDST", + syscall_0 = "0000000c", + syscall_1 = "0000000cY", + break_0 = "0000000d", + break_1 = "0000000dY", + sync_0 = "0000000f", + mfhi_1 = "00000010D", + mthi_1 = "00000011S", + mflo_1 = "00000012D", + mtlo_1 = "00000013S", + dsllv_3 = mips64 and "00000014DTS", + dsrlv_3 = mips64 and "00000016DTS", + dsrav_3 = mips64 and "00000017DTS", + mult_2 = "00000018ST", + multu_2 = "00000019ST", + div_2 = "0000001aST", + divu_2 = "0000001bST", + dmult_2 = mips64 and "0000001cST", + dmultu_2 = mips64 and "0000001dST", + ddiv_2 = mips64 and "0000001eST", + ddivu_2 = mips64 and "0000001fST", + add_3 = "00000020DST", + move_2 = mips64 and "00000025DS" or "00000021DS", + addu_3 = "00000021DST", + sub_3 = "00000022DST", + negu_2 = mips64 and "0000002fDT" or "00000023DT", + subu_3 = "00000023DST", + and_3 = "00000024DST", + or_3 = "00000025DST", + xor_3 = "00000026DST", + not_2 = "00000027DS", + nor_3 = "00000027DST", + slt_3 = "0000002aDST", + sltu_3 = "0000002bDST", + dadd_3 = mips64 and "0000002cDST", + daddu_3 = mips64 and "0000002dDST", + dsub_3 = mips64 and "0000002eDST", + dsubu_3 = mips64 and "0000002fDST", + tge_2 = "00000030ST", + tge_3 = "00000030STZ", + tgeu_2 = "00000031ST", + tgeu_3 = "00000031STZ", + tlt_2 = "00000032ST", + tlt_3 = "00000032STZ", + tltu_2 = "00000033ST", + tltu_3 = "00000033STZ", + teq_2 = "00000034ST", + teq_3 = "00000034STZ", + tne_2 = "00000036ST", + tne_3 = "00000036STZ", + dsll_3 = mips64 and "00000038DTa", + dsrl_3 = mips64 and "0000003aDTa", + drotr_3 = mips64 and "0020003aDTa", + dsra_3 = mips64 and "0000003bDTa", + dsll32_3 = mips64 and "0000003cDTA", + dsrl32_3 = mips64 and "0000003eDTA", + drotr32_3 = mips64 and "0020003eDTA", + dsra32_3 = mips64 and "0000003fDTA", + + -- Opcode REGIMM. + bltz_2 = "04000000SB", + bgez_2 = "04010000SB", + bltzl_2 = "04020000SB", + bgezl_2 = "04030000SB", + tgei_2 = "04080000SI", + tgeiu_2 = "04090000SI", + tlti_2 = "040a0000SI", + tltiu_2 = "040b0000SI", + teqi_2 = "040c0000SI", + tnei_2 = "040e0000SI", + bltzal_2 = "04100000SB", + bal_1 = "04110000B", + bgezal_2 = "04110000SB", + bltzall_2 = "04120000SB", + bgezall_2 = "04130000SB", + synci_1 = "041f0000O", + + -- Opcode SPECIAL2. + madd_2 = "70000000ST", + maddu_2 = "70000001ST", + mul_3 = "70000002DST", + msub_2 = "70000004ST", + msubu_2 = "70000005ST", + clz_2 = "70000020DS=", + clo_2 = "70000021DS=", + dclz_2 = mips64 and "70000024DS=", + dclo_2 = mips64 and "70000025DS=", + sdbbp_0 = "7000003f", + sdbbp_1 = "7000003fY", + + -- Opcode SPECIAL3. + ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 + dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32 + dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1 + dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1 + zextw_2 = mips64 and "7c00f803TS", + ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 + dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33 + dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33 + dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1 + wsbh_2 = "7c0000a0DT", + dsbh_2 = mips64 and "7c0000a4DT", + dshd_2 = mips64 and "7c000164DT", + seb_2 = "7c000420DT", + seh_2 = "7c000620DT", + rdhwr_2 = "7c00003bTD", + + -- Opcode COP0. + mfc0_2 = "40000000TD", + mfc0_3 = "40000000TDW", + dmfc0_2 = mips64 and "40200000TD", + dmfc0_3 = mips64 and "40200000TDW", + mtc0_2 = "40800000TD", + mtc0_3 = "40800000TDW", + dmtc0_2 = mips64 and "40a00000TD", + dmtc0_3 = mips64 and "40a00000TDW", + rdpgpr_2 = "41400000DT", + di_0 = "41606000", + di_1 = "41606000T", + ei_0 = "41606020", + ei_1 = "41606020T", + wrpgpr_2 = "41c00000DT", + tlbr_0 = "42000001", + tlbwi_0 = "42000002", + tlbwr_0 = "42000006", + tlbp_0 = "42000008", + eret_0 = "42000018", + deret_0 = "4200001f", + wait_0 = "42000020", + + -- Opcode COP1. + mfc1_2 = "44000000TG", + dmfc1_2 = mips64 and "44200000TG", + cfc1_2 = "44400000TG", + mfhc1_2 = "44600000TG", + mtc1_2 = "44800000TG", + dmtc1_2 = mips64 and "44a00000TG", + ctc1_2 = "44c00000TG", + mthc1_2 = "44e00000TG", + + bc1f_1 = "45000000B", + bc1f_2 = "45000000CB", + bc1t_1 = "45010000B", + bc1t_2 = "45010000CB", + bc1fl_1 = "45020000B", + bc1fl_2 = "45020000CB", + bc1tl_1 = "45030000B", + bc1tl_2 = "45030000CB", + + ["add.s_3"] = "46000000FGH", + ["sub.s_3"] = "46000001FGH", + ["mul.s_3"] = "46000002FGH", + ["div.s_3"] = "46000003FGH", + ["sqrt.s_2"] = "46000004FG", + ["abs.s_2"] = "46000005FG", + ["mov.s_2"] = "46000006FG", + ["neg.s_2"] = "46000007FG", + ["round.l.s_2"] = "46000008FG", + ["trunc.l.s_2"] = "46000009FG", + ["ceil.l.s_2"] = "4600000aFG", + ["floor.l.s_2"] = "4600000bFG", + ["round.w.s_2"] = "4600000cFG", + ["trunc.w.s_2"] = "4600000dFG", + ["ceil.w.s_2"] = "4600000eFG", + ["floor.w.s_2"] = "4600000fFG", + ["movf.s_2"] = "46000011FG", + ["movf.s_3"] = "46000011FGC", + ["movt.s_2"] = "46010011FG", + ["movt.s_3"] = "46010011FGC", + ["movz.s_3"] = "46000012FGT", + ["movn.s_3"] = "46000013FGT", + ["recip.s_2"] = "46000015FG", + ["rsqrt.s_2"] = "46000016FG", + ["cvt.d.s_2"] = "46000021FG", + ["cvt.w.s_2"] = "46000024FG", + ["cvt.l.s_2"] = "46000025FG", + ["cvt.ps.s_3"] = "46000026FGH", + ["c.f.s_2"] = "46000030GH", + ["c.f.s_3"] = "46000030VGH", + ["c.un.s_2"] = "46000031GH", + ["c.un.s_3"] = "46000031VGH", + ["c.eq.s_2"] = "46000032GH", + ["c.eq.s_3"] = "46000032VGH", + ["c.ueq.s_2"] = "46000033GH", + ["c.ueq.s_3"] = "46000033VGH", + ["c.olt.s_2"] = "46000034GH", + ["c.olt.s_3"] = "46000034VGH", + ["c.ult.s_2"] = "46000035GH", + ["c.ult.s_3"] = "46000035VGH", + ["c.ole.s_2"] = "46000036GH", + ["c.ole.s_3"] = "46000036VGH", + ["c.ule.s_2"] = "46000037GH", + ["c.ule.s_3"] = "46000037VGH", + ["c.sf.s_2"] = "46000038GH", + ["c.sf.s_3"] = "46000038VGH", + ["c.ngle.s_2"] = "46000039GH", + ["c.ngle.s_3"] = "46000039VGH", + ["c.seq.s_2"] = "4600003aGH", + ["c.seq.s_3"] = "4600003aVGH", + ["c.ngl.s_2"] = "4600003bGH", + ["c.ngl.s_3"] = "4600003bVGH", + ["c.lt.s_2"] = "4600003cGH", + ["c.lt.s_3"] = "4600003cVGH", + ["c.nge.s_2"] = "4600003dGH", + ["c.nge.s_3"] = "4600003dVGH", + ["c.le.s_2"] = "4600003eGH", + ["c.le.s_3"] = "4600003eVGH", + ["c.ngt.s_2"] = "4600003fGH", + ["c.ngt.s_3"] = "4600003fVGH", + + ["add.d_3"] = "46200000FGH", + ["sub.d_3"] = "46200001FGH", + ["mul.d_3"] = "46200002FGH", + ["div.d_3"] = "46200003FGH", + ["sqrt.d_2"] = "46200004FG", + ["abs.d_2"] = "46200005FG", + ["mov.d_2"] = "46200006FG", + ["neg.d_2"] = "46200007FG", + ["round.l.d_2"] = "46200008FG", + ["trunc.l.d_2"] = "46200009FG", + ["ceil.l.d_2"] = "4620000aFG", + ["floor.l.d_2"] = "4620000bFG", + ["round.w.d_2"] = "4620000cFG", + ["trunc.w.d_2"] = "4620000dFG", + ["ceil.w.d_2"] = "4620000eFG", + ["floor.w.d_2"] = "4620000fFG", + ["movf.d_2"] = "46200011FG", + ["movf.d_3"] = "46200011FGC", + ["movt.d_2"] = "46210011FG", + ["movt.d_3"] = "46210011FGC", + ["movz.d_3"] = "46200012FGT", + ["movn.d_3"] = "46200013FGT", + ["recip.d_2"] = "46200015FG", + ["rsqrt.d_2"] = "46200016FG", + ["cvt.s.d_2"] = "46200020FG", + ["cvt.w.d_2"] = "46200024FG", + ["cvt.l.d_2"] = "46200025FG", + ["c.f.d_2"] = "46200030GH", + ["c.f.d_3"] = "46200030VGH", + ["c.un.d_2"] = "46200031GH", + ["c.un.d_3"] = "46200031VGH", + ["c.eq.d_2"] = "46200032GH", + ["c.eq.d_3"] = "46200032VGH", + ["c.ueq.d_2"] = "46200033GH", + ["c.ueq.d_3"] = "46200033VGH", + ["c.olt.d_2"] = "46200034GH", + ["c.olt.d_3"] = "46200034VGH", + ["c.ult.d_2"] = "46200035GH", + ["c.ult.d_3"] = "46200035VGH", + ["c.ole.d_2"] = "46200036GH", + ["c.ole.d_3"] = "46200036VGH", + ["c.ule.d_2"] = "46200037GH", + ["c.ule.d_3"] = "46200037VGH", + ["c.sf.d_2"] = "46200038GH", + ["c.sf.d_3"] = "46200038VGH", + ["c.ngle.d_2"] = "46200039GH", + ["c.ngle.d_3"] = "46200039VGH", + ["c.seq.d_2"] = "4620003aGH", + ["c.seq.d_3"] = "4620003aVGH", + ["c.ngl.d_2"] = "4620003bGH", + ["c.ngl.d_3"] = "4620003bVGH", + ["c.lt.d_2"] = "4620003cGH", + ["c.lt.d_3"] = "4620003cVGH", + ["c.nge.d_2"] = "4620003dGH", + ["c.nge.d_3"] = "4620003dVGH", + ["c.le.d_2"] = "4620003eGH", + ["c.le.d_3"] = "4620003eVGH", + ["c.ngt.d_2"] = "4620003fGH", + ["c.ngt.d_3"] = "4620003fVGH", + + ["add.ps_3"] = "46c00000FGH", + ["sub.ps_3"] = "46c00001FGH", + ["mul.ps_3"] = "46c00002FGH", + ["abs.ps_2"] = "46c00005FG", + ["mov.ps_2"] = "46c00006FG", + ["neg.ps_2"] = "46c00007FG", + ["movf.ps_2"] = "46c00011FG", + ["movf.ps_3"] = "46c00011FGC", + ["movt.ps_2"] = "46c10011FG", + ["movt.ps_3"] = "46c10011FGC", + ["movz.ps_3"] = "46c00012FGT", + ["movn.ps_3"] = "46c00013FGT", + ["cvt.s.pu_2"] = "46c00020FG", + ["cvt.s.pl_2"] = "46c00028FG", + ["pll.ps_3"] = "46c0002cFGH", + ["plu.ps_3"] = "46c0002dFGH", + ["pul.ps_3"] = "46c0002eFGH", + ["puu.ps_3"] = "46c0002fFGH", + ["c.f.ps_2"] = "46c00030GH", + ["c.f.ps_3"] = "46c00030VGH", + ["c.un.ps_2"] = "46c00031GH", + ["c.un.ps_3"] = "46c00031VGH", + ["c.eq.ps_2"] = "46c00032GH", + ["c.eq.ps_3"] = "46c00032VGH", + ["c.ueq.ps_2"] = "46c00033GH", + ["c.ueq.ps_3"] = "46c00033VGH", + ["c.olt.ps_2"] = "46c00034GH", + ["c.olt.ps_3"] = "46c00034VGH", + ["c.ult.ps_2"] = "46c00035GH", + ["c.ult.ps_3"] = "46c00035VGH", + ["c.ole.ps_2"] = "46c00036GH", + ["c.ole.ps_3"] = "46c00036VGH", + ["c.ule.ps_2"] = "46c00037GH", + ["c.ule.ps_3"] = "46c00037VGH", + ["c.sf.ps_2"] = "46c00038GH", + ["c.sf.ps_3"] = "46c00038VGH", + ["c.ngle.ps_2"] = "46c00039GH", + ["c.ngle.ps_3"] = "46c00039VGH", + ["c.seq.ps_2"] = "46c0003aGH", + ["c.seq.ps_3"] = "46c0003aVGH", + ["c.ngl.ps_2"] = "46c0003bGH", + ["c.ngl.ps_3"] = "46c0003bVGH", + ["c.lt.ps_2"] = "46c0003cGH", + ["c.lt.ps_3"] = "46c0003cVGH", + ["c.nge.ps_2"] = "46c0003dGH", + ["c.nge.ps_3"] = "46c0003dVGH", + ["c.le.ps_2"] = "46c0003eGH", + ["c.le.ps_3"] = "46c0003eVGH", + ["c.ngt.ps_2"] = "46c0003fGH", + ["c.ngt.ps_3"] = "46c0003fVGH", + + ["cvt.s.w_2"] = "46800020FG", + ["cvt.d.w_2"] = "46800021FG", + + ["cvt.s.l_2"] = "46a00020FG", + ["cvt.d.l_2"] = "46a00021FG", + + -- Opcode COP1X. + lwxc1_2 = "4c000000FX", + ldxc1_2 = "4c000001FX", + luxc1_2 = "4c000005FX", + swxc1_2 = "4c000008FX", + sdxc1_2 = "4c000009FX", + suxc1_2 = "4c00000dFX", + prefx_2 = "4c00000fMX", + ["alnv.ps_4"] = "4c00001eFGHS", + ["madd.s_4"] = "4c000020FRGH", + ["madd.d_4"] = "4c000021FRGH", + ["madd.ps_4"] = "4c000026FRGH", + ["msub.s_4"] = "4c000028FRGH", + ["msub.d_4"] = "4c000029FRGH", + ["msub.ps_4"] = "4c00002eFRGH", + ["nmadd.s_4"] = "4c000030FRGH", + ["nmadd.d_4"] = "4c000031FRGH", + ["nmadd.ps_4"] = "4c000036FRGH", + ["nmsub.s_4"] = "4c000038FRGH", + ["nmsub.d_4"] = "4c000039FRGH", + ["nmsub.ps_4"] = "4c00003eFRGH", +} + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_fpr(expr) + local r = match(expr, "^f([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_imm(imm, bits, shift, scale, signed, action) + local n = tonumber(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^[rf]([1-3]?[0-9])$") or + match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction(action or "IMM", + (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) + return 0 + end +end + +local function parse_disp(disp) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = shl(parse_gpr(reg), 21) + local extname = match(imm, "^extern%s+(%S+)$") + if extname then + waction("REL_EXT", map_extern[extname], nil, 1) + return r + else + return r + parse_imm(imm, 16, 0, 0, true) + end + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if tp then + waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr)) + return shl(r, 21) + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_index(idx) + local rt, rs = match(idx, "^(.*)%(([%w_:]+)%)$") + if rt then + rt = parse_gpr(rt) + rs = parse_gpr(rs) + return shl(rt, 16) + shl(rs, 21) + end + werror("bad index `"..idx.."'") +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return sub(template, 9) end + local op = tonumber(sub(template, 1, 8), 16) + local n = 1 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 2 positions (ins/ext). + if secpos+2 > maxsecpos then wflush() end + local pos = wpos() + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + if p == "D" then + op = op + shl(parse_gpr(params[n]), 11); n = n + 1 + elseif p == "T" then + op = op + shl(parse_gpr(params[n]), 16); n = n + 1 + elseif p == "S" then + op = op + shl(parse_gpr(params[n]), 21); n = n + 1 + elseif p == "F" then + op = op + shl(parse_fpr(params[n]), 6); n = n + 1 + elseif p == "G" then + op = op + shl(parse_fpr(params[n]), 11); n = n + 1 + elseif p == "H" then + op = op + shl(parse_fpr(params[n]), 16); n = n + 1 + elseif p == "R" then + op = op + shl(parse_fpr(params[n]), 21); n = n + 1 + elseif p == "I" then + op = op + parse_imm(params[n], 16, 0, 0, true); n = n + 1 + elseif p == "U" then + op = op + parse_imm(params[n], 16, 0, 0, false); n = n + 1 + elseif p == "O" then + op = op + parse_disp(params[n]); n = n + 1 + elseif p == "X" then + op = op + parse_index(params[n]); n = n + 1 + elseif p == "B" or p == "J" then + local mode, m, s = parse_label(params[n], false) + if p == "B" then m = m + 2048 end + waction("REL_"..mode, m, s, 1) + n = n + 1 + elseif p == "A" then + op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 + elseif p == "a" then + local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1 + op = op + band(m, 0x7c0) + band(shr(m, 9), 4) + elseif p == "M" then + op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 + elseif p == "N" then + op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1 + elseif p == "C" then + op = op + parse_imm(params[n], 3, 18, 0, false); n = n + 1 + elseif p == "V" then + op = op + parse_imm(params[n], 3, 8, 0, false); n = n + 1 + elseif p == "W" then + op = op + parse_imm(params[n], 3, 0, 0, false); n = n + 1 + elseif p == "Y" then + op = op + parse_imm(params[n], 20, 6, 0, false); n = n + 1 + elseif p == "Z" then + op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 + elseif p == "=" then + op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo. + else + assert(false) + end + end + wputpos(pos, op) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/lib/LuaJIT/dynasm/dasm_mips64.lua b/lib/LuaJIT/dynasm/dasm_mips64.lua new file mode 100644 index 0000000..5636b23 --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_mips64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM MIPS64 module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +mips64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_mips") diff --git a/lib/LuaJIT/dynasm/dasm_ppc.h b/lib/LuaJIT/dynasm/dasm_ppc.h new file mode 100644 index 0000000..83fc030 --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_ppc.h @@ -0,0 +1,420 @@ +/* +** DynASM PPC/PPC64 encoding engine. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include <stddef.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> + +#define DASM_ARCH "ppc" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); +#endif + n >>= ((ins>>10)&31); +#ifdef DASM_CHECKS + if (ins & 0x8000) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMMSH: + CK((n >> 6) == 0, RANGE_I); + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMMSH: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4; + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); + patchrel: + CK((n & 3) == 0 && + (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >> + ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL); + cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc)); + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + case DASM_IMMSH: + cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/lib/LuaJIT/dynasm/dasm_ppc.lua b/lib/LuaJIT/dynasm/dasm_ppc.lua new file mode 100644 index 0000000..216f925 --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_ppc.lua @@ -0,0 +1,1919 @@ +------------------------------------------------------------------------------ +-- DynASM PPC/PPC64 module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +-- +-- Support for various extensions contributed by Caio Souza Oliveira. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "ppc", + description = "DynASM PPC module", + version = "1.4.0", + vernum = 10400, + release = "2015-10-18", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable = assert, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch = _s.match, _s.gmatch +local concat, sort = table.concat, table.sort +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local tohex = bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMMSH" +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0xffffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = { sp = "r1" } -- Ext. register name -> int. name. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + if s == "r1" then return "sp" end + return s +end + +local map_cond = { + lt = 0, gt = 1, eq = 2, so = 3, + ge = 4, le = 5, ne = 6, ns = 7, +} + +------------------------------------------------------------------------------ + +local map_op, op_template + +local function op_alias(opname, f) + return function(params, nparams) + if not params then return "-> "..opname:sub(1, -3) end + f(params, nparams) + op_template(params, map_op[opname], nparams) + end +end + +-- Template strings for PPC instructions. +map_op = { + tdi_3 = "08000000ARI", + twi_3 = "0c000000ARI", + mulli_3 = "1c000000RRI", + subfic_3 = "20000000RRI", + cmplwi_3 = "28000000XRU", + cmplwi_2 = "28000000-RU", + cmpldi_3 = "28200000XRU", + cmpldi_2 = "28200000-RU", + cmpwi_3 = "2c000000XRI", + cmpwi_2 = "2c000000-RI", + cmpdi_3 = "2c200000XRI", + cmpdi_2 = "2c200000-RI", + addic_3 = "30000000RRI", + ["addic._3"] = "34000000RRI", + addi_3 = "38000000RR0I", + li_2 = "38000000RI", + la_2 = "38000000RD", + addis_3 = "3c000000RR0I", + lis_2 = "3c000000RI", + lus_2 = "3c000000RU", + bc_3 = "40000000AAK", + bcl_3 = "40000001AAK", + bdnz_1 = "42000000K", + bdz_1 = "42400000K", + sc_0 = "44000000", + b_1 = "48000000J", + bl_1 = "48000001J", + rlwimi_5 = "50000000RR~AAA.", + rlwinm_5 = "54000000RR~AAA.", + rlwnm_5 = "5c000000RR~RAA.", + ori_3 = "60000000RR~U", + nop_0 = "60000000", + oris_3 = "64000000RR~U", + xori_3 = "68000000RR~U", + xoris_3 = "6c000000RR~U", + ["andi._3"] = "70000000RR~U", + ["andis._3"] = "74000000RR~U", + lwz_2 = "80000000RD", + lwzu_2 = "84000000RD", + lbz_2 = "88000000RD", + lbzu_2 = "8c000000RD", + stw_2 = "90000000RD", + stwu_2 = "94000000RD", + stb_2 = "98000000RD", + stbu_2 = "9c000000RD", + lhz_2 = "a0000000RD", + lhzu_2 = "a4000000RD", + lha_2 = "a8000000RD", + lhau_2 = "ac000000RD", + sth_2 = "b0000000RD", + sthu_2 = "b4000000RD", + lmw_2 = "b8000000RD", + stmw_2 = "bc000000RD", + lfs_2 = "c0000000FD", + lfsu_2 = "c4000000FD", + lfd_2 = "c8000000FD", + lfdu_2 = "cc000000FD", + stfs_2 = "d0000000FD", + stfsu_2 = "d4000000FD", + stfd_2 = "d8000000FD", + stfdu_2 = "dc000000FD", + ld_2 = "e8000000RD", -- NYI: displacement must be divisible by 4. + ldu_2 = "e8000001RD", + lwa_2 = "e8000002RD", + std_2 = "f8000000RD", + stdu_2 = "f8000001RD", + + subi_3 = op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end), + subis_3 = op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end), + subic_3 = op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end), + ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end), + + rotlwi_3 = op_alias("rlwinm_5", function(p) + p[4] = "0"; p[5] = "31" + end), + rotrwi_3 = op_alias("rlwinm_5", function(p) + p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31" + end), + rotlw_3 = op_alias("rlwnm_5", function(p) + p[4] = "0"; p[5] = "31" + end), + slwi_3 = op_alias("rlwinm_5", function(p) + p[5] = "31-("..p[3]..")"; p[4] = "0" + end), + srwi_3 = op_alias("rlwinm_5", function(p) + p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31" + end), + clrlwi_3 = op_alias("rlwinm_5", function(p) + p[4] = p[3]; p[3] = "0"; p[5] = "31" + end), + clrrwi_3 = op_alias("rlwinm_5", function(p) + p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0" + end), + + -- Primary opcode 4: + mulhhwu_3 = "10000010RRR.", + machhwu_3 = "10000018RRR.", + mulhhw_3 = "10000050RRR.", + nmachhw_3 = "1000005cRRR.", + machhwsu_3 = "10000098RRR.", + machhws_3 = "100000d8RRR.", + nmachhws_3 = "100000dcRRR.", + mulchwu_3 = "10000110RRR.", + macchwu_3 = "10000118RRR.", + mulchw_3 = "10000150RRR.", + macchw_3 = "10000158RRR.", + nmacchw_3 = "1000015cRRR.", + macchwsu_3 = "10000198RRR.", + macchws_3 = "100001d8RRR.", + nmacchws_3 = "100001dcRRR.", + mullhw_3 = "10000350RRR.", + maclhw_3 = "10000358RRR.", + nmaclhw_3 = "1000035cRRR.", + maclhwsu_3 = "10000398RRR.", + maclhws_3 = "100003d8RRR.", + nmaclhws_3 = "100003dcRRR.", + machhwuo_3 = "10000418RRR.", + nmachhwo_3 = "1000045cRRR.", + machhwsuo_3 = "10000498RRR.", + machhwso_3 = "100004d8RRR.", + nmachhwso_3 = "100004dcRRR.", + macchwuo_3 = "10000518RRR.", + macchwo_3 = "10000558RRR.", + nmacchwo_3 = "1000055cRRR.", + macchwsuo_3 = "10000598RRR.", + macchwso_3 = "100005d8RRR.", + nmacchwso_3 = "100005dcRRR.", + maclhwo_3 = "10000758RRR.", + nmaclhwo_3 = "1000075cRRR.", + maclhwsuo_3 = "10000798RRR.", + maclhwso_3 = "100007d8RRR.", + nmaclhwso_3 = "100007dcRRR.", + + vaddubm_3 = "10000000VVV", + vmaxub_3 = "10000002VVV", + vrlb_3 = "10000004VVV", + vcmpequb_3 = "10000006VVV", + vmuloub_3 = "10000008VVV", + vaddfp_3 = "1000000aVVV", + vmrghb_3 = "1000000cVVV", + vpkuhum_3 = "1000000eVVV", + vmhaddshs_4 = "10000020VVVV", + vmhraddshs_4 = "10000021VVVV", + vmladduhm_4 = "10000022VVVV", + vmsumubm_4 = "10000024VVVV", + vmsummbm_4 = "10000025VVVV", + vmsumuhm_4 = "10000026VVVV", + vmsumuhs_4 = "10000027VVVV", + vmsumshm_4 = "10000028VVVV", + vmsumshs_4 = "10000029VVVV", + vsel_4 = "1000002aVVVV", + vperm_4 = "1000002bVVVV", + vsldoi_4 = "1000002cVVVP", + vpermxor_4 = "1000002dVVVV", + vmaddfp_4 = "1000002eVVVV~", + vnmsubfp_4 = "1000002fVVVV~", + vaddeuqm_4 = "1000003cVVVV", + vaddecuq_4 = "1000003dVVVV", + vsubeuqm_4 = "1000003eVVVV", + vsubecuq_4 = "1000003fVVVV", + vadduhm_3 = "10000040VVV", + vmaxuh_3 = "10000042VVV", + vrlh_3 = "10000044VVV", + vcmpequh_3 = "10000046VVV", + vmulouh_3 = "10000048VVV", + vsubfp_3 = "1000004aVVV", + vmrghh_3 = "1000004cVVV", + vpkuwum_3 = "1000004eVVV", + vadduwm_3 = "10000080VVV", + vmaxuw_3 = "10000082VVV", + vrlw_3 = "10000084VVV", + vcmpequw_3 = "10000086VVV", + vmulouw_3 = "10000088VVV", + vmuluwm_3 = "10000089VVV", + vmrghw_3 = "1000008cVVV", + vpkuhus_3 = "1000008eVVV", + vaddudm_3 = "100000c0VVV", + vmaxud_3 = "100000c2VVV", + vrld_3 = "100000c4VVV", + vcmpeqfp_3 = "100000c6VVV", + vcmpequd_3 = "100000c7VVV", + vpkuwus_3 = "100000ceVVV", + vadduqm_3 = "10000100VVV", + vmaxsb_3 = "10000102VVV", + vslb_3 = "10000104VVV", + vmulosb_3 = "10000108VVV", + vrefp_2 = "1000010aV-V", + vmrglb_3 = "1000010cVVV", + vpkshus_3 = "1000010eVVV", + vaddcuq_3 = "10000140VVV", + vmaxsh_3 = "10000142VVV", + vslh_3 = "10000144VVV", + vmulosh_3 = "10000148VVV", + vrsqrtefp_2 = "1000014aV-V", + vmrglh_3 = "1000014cVVV", + vpkswus_3 = "1000014eVVV", + vaddcuw_3 = "10000180VVV", + vmaxsw_3 = "10000182VVV", + vslw_3 = "10000184VVV", + vmulosw_3 = "10000188VVV", + vexptefp_2 = "1000018aV-V", + vmrglw_3 = "1000018cVVV", + vpkshss_3 = "1000018eVVV", + vmaxsd_3 = "100001c2VVV", + vsl_3 = "100001c4VVV", + vcmpgefp_3 = "100001c6VVV", + vlogefp_2 = "100001caV-V", + vpkswss_3 = "100001ceVVV", + vadduhs_3 = "10000240VVV", + vminuh_3 = "10000242VVV", + vsrh_3 = "10000244VVV", + vcmpgtuh_3 = "10000246VVV", + vmuleuh_3 = "10000248VVV", + vrfiz_2 = "1000024aV-V", + vsplth_3 = "1000024cVV3", + vupkhsh_2 = "1000024eV-V", + vminuw_3 = "10000282VVV", + vminud_3 = "100002c2VVV", + vcmpgtud_3 = "100002c7VVV", + vrfim_2 = "100002caV-V", + vcmpgtsb_3 = "10000306VVV", + vcfux_3 = "1000030aVVA~", + vaddshs_3 = "10000340VVV", + vminsh_3 = "10000342VVV", + vsrah_3 = "10000344VVV", + vcmpgtsh_3 = "10000346VVV", + vmulesh_3 = "10000348VVV", + vcfsx_3 = "1000034aVVA~", + vspltish_2 = "1000034cVS", + vupkhpx_2 = "1000034eV-V", + vaddsws_3 = "10000380VVV", + vminsw_3 = "10000382VVV", + vsraw_3 = "10000384VVV", + vcmpgtsw_3 = "10000386VVV", + vmulesw_3 = "10000388VVV", + vctuxs_3 = "1000038aVVA~", + vspltisw_2 = "1000038cVS", + vminsd_3 = "100003c2VVV", + vsrad_3 = "100003c4VVV", + vcmpbfp_3 = "100003c6VVV", + vcmpgtsd_3 = "100003c7VVV", + vctsxs_3 = "100003caVVA~", + vupklpx_2 = "100003ceV-V", + vsububm_3 = "10000400VVV", + ["bcdadd._4"] = "10000401VVVy.", + vavgub_3 = "10000402VVV", + vand_3 = "10000404VVV", + ["vcmpequb._3"] = "10000406VVV", + vmaxfp_3 = "1000040aVVV", + vsubuhm_3 = "10000440VVV", + ["bcdsub._4"] = "10000441VVVy.", + vavguh_3 = "10000442VVV", + vandc_3 = "10000444VVV", + ["vcmpequh._3"] = "10000446VVV", + vminfp_3 = "1000044aVVV", + vpkudum_3 = "1000044eVVV", + vsubuwm_3 = "10000480VVV", + vavguw_3 = "10000482VVV", + vor_3 = "10000484VVV", + ["vcmpequw._3"] = "10000486VVV", + vpmsumw_3 = "10000488VVV", + ["vcmpeqfp._3"] = "100004c6VVV", + ["vcmpequd._3"] = "100004c7VVV", + vpkudus_3 = "100004ceVVV", + vavgsb_3 = "10000502VVV", + vavgsh_3 = "10000542VVV", + vorc_3 = "10000544VVV", + vbpermq_3 = "1000054cVVV", + vpksdus_3 = "1000054eVVV", + vavgsw_3 = "10000582VVV", + vsld_3 = "100005c4VVV", + ["vcmpgefp._3"] = "100005c6VVV", + vpksdss_3 = "100005ceVVV", + vsububs_3 = "10000600VVV", + mfvscr_1 = "10000604V--", + vsum4ubs_3 = "10000608VVV", + vsubuhs_3 = "10000640VVV", + mtvscr_1 = "10000644--V", + ["vcmpgtuh._3"] = "10000646VVV", + vsum4shs_3 = "10000648VVV", + vupkhsw_2 = "1000064eV-V", + vsubuws_3 = "10000680VVV", + vshasigmaw_4 = "10000682VVYp", + veqv_3 = "10000684VVV", + vsum2sws_3 = "10000688VVV", + vmrgow_3 = "1000068cVVV", + vshasigmad_4 = "100006c2VVYp", + vsrd_3 = "100006c4VVV", + ["vcmpgtud._3"] = "100006c7VVV", + vupklsw_2 = "100006ceV-V", + vupkslw_2 = "100006ceV-V", + vsubsbs_3 = "10000700VVV", + vclzb_2 = "10000702V-V", + vpopcntb_2 = "10000703V-V", + ["vcmpgtsb._3"] = "10000706VVV", + vsum4sbs_3 = "10000708VVV", + vsubshs_3 = "10000740VVV", + vclzh_2 = "10000742V-V", + vpopcnth_2 = "10000743V-V", + ["vcmpgtsh._3"] = "10000746VVV", + vsubsws_3 = "10000780VVV", + vclzw_2 = "10000782V-V", + vpopcntw_2 = "10000783V-V", + ["vcmpgtsw._3"] = "10000786VVV", + vsumsws_3 = "10000788VVV", + vmrgew_3 = "1000078cVVV", + vclzd_2 = "100007c2V-V", + vpopcntd_2 = "100007c3V-V", + ["vcmpbfp._3"] = "100007c6VVV", + ["vcmpgtsd._3"] = "100007c7VVV", + + -- Primary opcode 19: + mcrf_2 = "4c000000XX", + isync_0 = "4c00012c", + crnor_3 = "4c000042CCC", + crnot_2 = "4c000042CC=", + crandc_3 = "4c000102CCC", + crxor_3 = "4c000182CCC", + crclr_1 = "4c000182C==", + crnand_3 = "4c0001c2CCC", + crand_3 = "4c000202CCC", + creqv_3 = "4c000242CCC", + crset_1 = "4c000242C==", + crorc_3 = "4c000342CCC", + cror_3 = "4c000382CCC", + crmove_2 = "4c000382CC=", + bclr_2 = "4c000020AA", + bclrl_2 = "4c000021AA", + bcctr_2 = "4c000420AA", + bcctrl_2 = "4c000421AA", + bctar_2 = "4c000460AA", + bctarl_2 = "4c000461AA", + blr_0 = "4e800020", + blrl_0 = "4e800021", + bctr_0 = "4e800420", + bctrl_0 = "4e800421", + + -- Primary opcode 31: + cmpw_3 = "7c000000XRR", + cmpw_2 = "7c000000-RR", + cmpd_3 = "7c200000XRR", + cmpd_2 = "7c200000-RR", + tw_3 = "7c000008ARR", + lvsl_3 = "7c00000cVRR", + subfc_3 = "7c000010RRR.", + subc_3 = "7c000010RRR~.", + mulhdu_3 = "7c000012RRR.", + addc_3 = "7c000014RRR.", + mulhwu_3 = "7c000016RRR.", + isel_4 = "7c00001eRRRC", + isellt_3 = "7c00001eRRR", + iselgt_3 = "7c00005eRRR", + iseleq_3 = "7c00009eRRR", + mfcr_1 = "7c000026R", + mfocrf_2 = "7c100026RG", + mtcrf_2 = "7c000120GR", + mtocrf_2 = "7c100120GR", + lwarx_3 = "7c000028RR0R", + ldx_3 = "7c00002aRR0R", + lwzx_3 = "7c00002eRR0R", + slw_3 = "7c000030RR~R.", + cntlzw_2 = "7c000034RR~", + sld_3 = "7c000036RR~R.", + and_3 = "7c000038RR~R.", + cmplw_3 = "7c000040XRR", + cmplw_2 = "7c000040-RR", + cmpld_3 = "7c200040XRR", + cmpld_2 = "7c200040-RR", + lvsr_3 = "7c00004cVRR", + subf_3 = "7c000050RRR.", + sub_3 = "7c000050RRR~.", + lbarx_3 = "7c000068RR0R", + ldux_3 = "7c00006aRR0R", + dcbst_2 = "7c00006c-RR", + lwzux_3 = "7c00006eRR0R", + cntlzd_2 = "7c000074RR~", + andc_3 = "7c000078RR~R.", + td_3 = "7c000088ARR", + lvewx_3 = "7c00008eVRR", + mulhd_3 = "7c000092RRR.", + addg6s_3 = "7c000094RRR", + mulhw_3 = "7c000096RRR.", + dlmzb_3 = "7c00009cRR~R.", + ldarx_3 = "7c0000a8RR0R", + dcbf_2 = "7c0000ac-RR", + lbzx_3 = "7c0000aeRR0R", + lvx_3 = "7c0000ceVRR", + neg_2 = "7c0000d0RR.", + lharx_3 = "7c0000e8RR0R", + lbzux_3 = "7c0000eeRR0R", + popcntb_2 = "7c0000f4RR~", + not_2 = "7c0000f8RR~%.", + nor_3 = "7c0000f8RR~R.", + stvebx_3 = "7c00010eVRR", + subfe_3 = "7c000110RRR.", + sube_3 = "7c000110RRR~.", + adde_3 = "7c000114RRR.", + stdx_3 = "7c00012aRR0R", + ["stwcx._3"] = "7c00012dRR0R.", + stwx_3 = "7c00012eRR0R", + prtyw_2 = "7c000134RR~", + stvehx_3 = "7c00014eVRR", + stdux_3 = "7c00016aRR0R", + ["stqcx._3"] = "7c00016dR:R0R.", + stwux_3 = "7c00016eRR0R", + prtyd_2 = "7c000174RR~", + stvewx_3 = "7c00018eVRR", + subfze_2 = "7c000190RR.", + addze_2 = "7c000194RR.", + ["stdcx._3"] = "7c0001adRR0R.", + stbx_3 = "7c0001aeRR0R", + stvx_3 = "7c0001ceVRR", + subfme_2 = "7c0001d0RR.", + mulld_3 = "7c0001d2RRR.", + addme_2 = "7c0001d4RR.", + mullw_3 = "7c0001d6RRR.", + dcbtst_2 = "7c0001ec-RR", + stbux_3 = "7c0001eeRR0R", + bpermd_3 = "7c0001f8RR~R", + lvepxl_3 = "7c00020eVRR", + add_3 = "7c000214RRR.", + lqarx_3 = "7c000228R:R0R", + dcbt_2 = "7c00022c-RR", + lhzx_3 = "7c00022eRR0R", + cdtbcd_2 = "7c000234RR~", + eqv_3 = "7c000238RR~R.", + lvepx_3 = "7c00024eVRR", + eciwx_3 = "7c00026cRR0R", + lhzux_3 = "7c00026eRR0R", + cbcdtd_2 = "7c000274RR~", + xor_3 = "7c000278RR~R.", + mfspefscr_1 = "7c0082a6R", + mfxer_1 = "7c0102a6R", + mflr_1 = "7c0802a6R", + mfctr_1 = "7c0902a6R", + lwax_3 = "7c0002aaRR0R", + lhax_3 = "7c0002aeRR0R", + mftb_1 = "7c0c42e6R", + mftbu_1 = "7c0d42e6R", + lvxl_3 = "7c0002ceVRR", + lwaux_3 = "7c0002eaRR0R", + lhaux_3 = "7c0002eeRR0R", + popcntw_2 = "7c0002f4RR~", + divdeu_3 = "7c000312RRR.", + divweu_3 = "7c000316RRR.", + sthx_3 = "7c00032eRR0R", + orc_3 = "7c000338RR~R.", + ecowx_3 = "7c00036cRR0R", + sthux_3 = "7c00036eRR0R", + or_3 = "7c000378RR~R.", + mr_2 = "7c000378RR~%.", + divdu_3 = "7c000392RRR.", + divwu_3 = "7c000396RRR.", + mtspefscr_1 = "7c0083a6R", + mtxer_1 = "7c0103a6R", + mtlr_1 = "7c0803a6R", + mtctr_1 = "7c0903a6R", + dcbi_2 = "7c0003ac-RR", + nand_3 = "7c0003b8RR~R.", + dsn_2 = "7c0003c6-RR", + stvxl_3 = "7c0003ceVRR", + divd_3 = "7c0003d2RRR.", + divw_3 = "7c0003d6RRR.", + popcntd_2 = "7c0003f4RR~", + cmpb_3 = "7c0003f8RR~R.", + mcrxr_1 = "7c000400X", + lbdx_3 = "7c000406RRR", + subfco_3 = "7c000410RRR.", + subco_3 = "7c000410RRR~.", + addco_3 = "7c000414RRR.", + ldbrx_3 = "7c000428RR0R", + lswx_3 = "7c00042aRR0R", + lwbrx_3 = "7c00042cRR0R", + lfsx_3 = "7c00042eFR0R", + srw_3 = "7c000430RR~R.", + srd_3 = "7c000436RR~R.", + lhdx_3 = "7c000446RRR", + subfo_3 = "7c000450RRR.", + subo_3 = "7c000450RRR~.", + lfsux_3 = "7c00046eFR0R", + lwdx_3 = "7c000486RRR", + lswi_3 = "7c0004aaRR0A", + sync_0 = "7c0004ac", + lwsync_0 = "7c2004ac", + ptesync_0 = "7c4004ac", + lfdx_3 = "7c0004aeFR0R", + lddx_3 = "7c0004c6RRR", + nego_2 = "7c0004d0RR.", + lfdux_3 = "7c0004eeFR0R", + stbdx_3 = "7c000506RRR", + subfeo_3 = "7c000510RRR.", + subeo_3 = "7c000510RRR~.", + addeo_3 = "7c000514RRR.", + stdbrx_3 = "7c000528RR0R", + stswx_3 = "7c00052aRR0R", + stwbrx_3 = "7c00052cRR0R", + stfsx_3 = "7c00052eFR0R", + sthdx_3 = "7c000546RRR", + ["stbcx._3"] = "7c00056dRRR", + stfsux_3 = "7c00056eFR0R", + stwdx_3 = "7c000586RRR", + subfzeo_2 = "7c000590RR.", + addzeo_2 = "7c000594RR.", + stswi_3 = "7c0005aaRR0A", + ["sthcx._3"] = "7c0005adRRR", + stfdx_3 = "7c0005aeFR0R", + stddx_3 = "7c0005c6RRR", + subfmeo_2 = "7c0005d0RR.", + mulldo_3 = "7c0005d2RRR.", + addmeo_2 = "7c0005d4RR.", + mullwo_3 = "7c0005d6RRR.", + dcba_2 = "7c0005ec-RR", + stfdux_3 = "7c0005eeFR0R", + stvepxl_3 = "7c00060eVRR", + addo_3 = "7c000614RRR.", + lhbrx_3 = "7c00062cRR0R", + lfdpx_3 = "7c00062eF:RR", + sraw_3 = "7c000630RR~R.", + srad_3 = "7c000634RR~R.", + lfddx_3 = "7c000646FRR", + stvepx_3 = "7c00064eVRR", + srawi_3 = "7c000670RR~A.", + sradi_3 = "7c000674RR~H.", + eieio_0 = "7c0006ac", + lfiwax_3 = "7c0006aeFR0R", + divdeuo_3 = "7c000712RRR.", + divweuo_3 = "7c000716RRR.", + sthbrx_3 = "7c00072cRR0R", + stfdpx_3 = "7c00072eF:RR", + extsh_2 = "7c000734RR~.", + stfddx_3 = "7c000746FRR", + divdeo_3 = "7c000752RRR.", + divweo_3 = "7c000756RRR.", + extsb_2 = "7c000774RR~.", + divduo_3 = "7c000792RRR.", + divwou_3 = "7c000796RRR.", + icbi_2 = "7c0007ac-RR", + stfiwx_3 = "7c0007aeFR0R", + extsw_2 = "7c0007b4RR~.", + divdo_3 = "7c0007d2RRR.", + divwo_3 = "7c0007d6RRR.", + dcbz_2 = "7c0007ec-RR", + + ["tbegin._1"] = "7c00051d1", + ["tbegin._0"] = "7c00051d", + ["tend._1"] = "7c00055dY", + ["tend._0"] = "7c00055d", + ["tendall._0"] = "7e00055d", + tcheck_1 = "7c00059cX", + ["tsr._1"] = "7c0005dd1", + ["tsuspend._0"] = "7c0005dd", + ["tresume._0"] = "7c2005dd", + ["tabortwc._3"] = "7c00061dARR", + ["tabortdc._3"] = "7c00065dARR", + ["tabortwci._3"] = "7c00069dARS", + ["tabortdci._3"] = "7c0006ddARS", + ["tabort._1"] = "7c00071d-R-", + ["treclaim._1"] = "7c00075d-R", + ["trechkpt._0"] = "7c0007dd", + + lxsiwzx_3 = "7c000018QRR", + lxsiwax_3 = "7c000098QRR", + mfvsrd_2 = "7c000066-Rq", + mfvsrwz_2 = "7c0000e6-Rq", + stxsiwx_3 = "7c000118QRR", + mtvsrd_2 = "7c000166QR", + mtvsrwa_2 = "7c0001a6QR", + lxvdsx_3 = "7c000298QRR", + lxsspx_3 = "7c000418QRR", + lxsdx_3 = "7c000498QRR", + stxsspx_3 = "7c000518QRR", + stxsdx_3 = "7c000598QRR", + lxvw4x_3 = "7c000618QRR", + lxvd2x_3 = "7c000698QRR", + stxvw4x_3 = "7c000718QRR", + stxvd2x_3 = "7c000798QRR", + + -- Primary opcode 30: + rldicl_4 = "78000000RR~HM.", + rldicr_4 = "78000004RR~HM.", + rldic_4 = "78000008RR~HM.", + rldimi_4 = "7800000cRR~HM.", + rldcl_4 = "78000010RR~RM.", + rldcr_4 = "78000012RR~RM.", + + rotldi_3 = op_alias("rldicl_4", function(p) + p[4] = "0" + end), + rotrdi_3 = op_alias("rldicl_4", function(p) + p[3] = "64-("..p[3]..")"; p[4] = "0" + end), + rotld_3 = op_alias("rldcl_4", function(p) + p[4] = "0" + end), + sldi_3 = op_alias("rldicr_4", function(p) + p[4] = "63-("..p[3]..")" + end), + srdi_3 = op_alias("rldicl_4", function(p) + p[4] = p[3]; p[3] = "64-("..p[3]..")" + end), + clrldi_3 = op_alias("rldicl_4", function(p) + p[4] = p[3]; p[3] = "0" + end), + clrrdi_3 = op_alias("rldicr_4", function(p) + p[4] = "63-("..p[3]..")"; p[3] = "0" + end), + + -- Primary opcode 56: + lq_2 = "e0000000R:D", -- NYI: displacement must be divisible by 8. + + -- Primary opcode 57: + lfdp_2 = "e4000000F:D", -- NYI: displacement must be divisible by 4. + + -- Primary opcode 59: + fdivs_3 = "ec000024FFF.", + fsubs_3 = "ec000028FFF.", + fadds_3 = "ec00002aFFF.", + fsqrts_2 = "ec00002cF-F.", + fres_2 = "ec000030F-F.", + fmuls_3 = "ec000032FF-F.", + frsqrtes_2 = "ec000034F-F.", + fmsubs_4 = "ec000038FFFF~.", + fmadds_4 = "ec00003aFFFF~.", + fnmsubs_4 = "ec00003cFFFF~.", + fnmadds_4 = "ec00003eFFFF~.", + fcfids_2 = "ec00069cF-F.", + fcfidus_2 = "ec00079cF-F.", + + dadd_3 = "ec000004FFF.", + dqua_4 = "ec000006FFFZ.", + dmul_3 = "ec000044FFF.", + drrnd_4 = "ec000046FFFZ.", + dscli_3 = "ec000084FF6.", + dquai_4 = "ec000086SF~FZ.", + dscri_3 = "ec0000c4FF6.", + drintx_4 = "ec0000c61F~FZ.", + dcmpo_3 = "ec000104XFF", + dtstex_3 = "ec000144XFF", + dtstdc_3 = "ec000184XF6", + dtstdg_3 = "ec0001c4XF6", + drintn_4 = "ec0001c61F~FZ.", + dctdp_2 = "ec000204F-F.", + dctfix_2 = "ec000244F-F.", + ddedpd_3 = "ec000284ZF~F.", + dxex_2 = "ec0002c4F-F.", + dsub_3 = "ec000404FFF.", + ddiv_3 = "ec000444FFF.", + dcmpu_3 = "ec000504XFF", + dtstsf_3 = "ec000544XFF", + drsp_2 = "ec000604F-F.", + dcffix_2 = "ec000644F-F.", + denbcd_3 = "ec000684YF~F.", + diex_3 = "ec0006c4FFF.", + + -- Primary opcode 60: + xsaddsp_3 = "f0000000QQQ", + xsmaddasp_3 = "f0000008QQQ", + xxsldwi_4 = "f0000010QQQz", + xsrsqrtesp_2 = "f0000028Q-Q", + xssqrtsp_2 = "f000002cQ-Q", + xxsel_4 = "f0000030QQQQ", + xssubsp_3 = "f0000040QQQ", + xsmaddmsp_3 = "f0000048QQQ", + xxpermdi_4 = "f0000050QQQz", + xsresp_2 = "f0000068Q-Q", + xsmulsp_3 = "f0000080QQQ", + xsmsubasp_3 = "f0000088QQQ", + xxmrghw_3 = "f0000090QQQ", + xsdivsp_3 = "f00000c0QQQ", + xsmsubmsp_3 = "f00000c8QQQ", + xsadddp_3 = "f0000100QQQ", + xsmaddadp_3 = "f0000108QQQ", + xscmpudp_3 = "f0000118XQQ", + xscvdpuxws_2 = "f0000120Q-Q", + xsrdpi_2 = "f0000124Q-Q", + xsrsqrtedp_2 = "f0000128Q-Q", + xssqrtdp_2 = "f000012cQ-Q", + xssubdp_3 = "f0000140QQQ", + xsmaddmdp_3 = "f0000148QQQ", + xscmpodp_3 = "f0000158XQQ", + xscvdpsxws_2 = "f0000160Q-Q", + xsrdpiz_2 = "f0000164Q-Q", + xsredp_2 = "f0000168Q-Q", + xsmuldp_3 = "f0000180QQQ", + xsmsubadp_3 = "f0000188QQQ", + xxmrglw_3 = "f0000190QQQ", + xsrdpip_2 = "f00001a4Q-Q", + xstsqrtdp_2 = "f00001a8X-Q", + xsrdpic_2 = "f00001acQ-Q", + xsdivdp_3 = "f00001c0QQQ", + xsmsubmdp_3 = "f00001c8QQQ", + xsrdpim_2 = "f00001e4Q-Q", + xstdivdp_3 = "f00001e8XQQ", + xvaddsp_3 = "f0000200QQQ", + xvmaddasp_3 = "f0000208QQQ", + xvcmpeqsp_3 = "f0000218QQQ", + xvcvspuxws_2 = "f0000220Q-Q", + xvrspi_2 = "f0000224Q-Q", + xvrsqrtesp_2 = "f0000228Q-Q", + xvsqrtsp_2 = "f000022cQ-Q", + xvsubsp_3 = "f0000240QQQ", + xvmaddmsp_3 = "f0000248QQQ", + xvcmpgtsp_3 = "f0000258QQQ", + xvcvspsxws_2 = "f0000260Q-Q", + xvrspiz_2 = "f0000264Q-Q", + xvresp_2 = "f0000268Q-Q", + xvmulsp_3 = "f0000280QQQ", + xvmsubasp_3 = "f0000288QQQ", + xxspltw_3 = "f0000290QQg~", + xvcmpgesp_3 = "f0000298QQQ", + xvcvuxwsp_2 = "f00002a0Q-Q", + xvrspip_2 = "f00002a4Q-Q", + xvtsqrtsp_2 = "f00002a8X-Q", + xvrspic_2 = "f00002acQ-Q", + xvdivsp_3 = "f00002c0QQQ", + xvmsubmsp_3 = "f00002c8QQQ", + xvcvsxwsp_2 = "f00002e0Q-Q", + xvrspim_2 = "f00002e4Q-Q", + xvtdivsp_3 = "f00002e8XQQ", + xvadddp_3 = "f0000300QQQ", + xvmaddadp_3 = "f0000308QQQ", + xvcmpeqdp_3 = "f0000318QQQ", + xvcvdpuxws_2 = "f0000320Q-Q", + xvrdpi_2 = "f0000324Q-Q", + xvrsqrtedp_2 = "f0000328Q-Q", + xvsqrtdp_2 = "f000032cQ-Q", + xvsubdp_3 = "f0000340QQQ", + xvmaddmdp_3 = "f0000348QQQ", + xvcmpgtdp_3 = "f0000358QQQ", + xvcvdpsxws_2 = "f0000360Q-Q", + xvrdpiz_2 = "f0000364Q-Q", + xvredp_2 = "f0000368Q-Q", + xvmuldp_3 = "f0000380QQQ", + xvmsubadp_3 = "f0000388QQQ", + xvcmpgedp_3 = "f0000398QQQ", + xvcvuxwdp_2 = "f00003a0Q-Q", + xvrdpip_2 = "f00003a4Q-Q", + xvtsqrtdp_2 = "f00003a8X-Q", + xvrdpic_2 = "f00003acQ-Q", + xvdivdp_3 = "f00003c0QQQ", + xvmsubmdp_3 = "f00003c8QQQ", + xvcvsxwdp_2 = "f00003e0Q-Q", + xvrdpim_2 = "f00003e4Q-Q", + xvtdivdp_3 = "f00003e8XQQ", + xsnmaddasp_3 = "f0000408QQQ", + xxland_3 = "f0000410QQQ", + xscvdpsp_2 = "f0000424Q-Q", + xscvdpspn_2 = "f000042cQ-Q", + xsnmaddmsp_3 = "f0000448QQQ", + xxlandc_3 = "f0000450QQQ", + xsrsp_2 = "f0000464Q-Q", + xsnmsubasp_3 = "f0000488QQQ", + xxlor_3 = "f0000490QQQ", + xscvuxdsp_2 = "f00004a0Q-Q", + xsnmsubmsp_3 = "f00004c8QQQ", + xxlxor_3 = "f00004d0QQQ", + xscvsxdsp_2 = "f00004e0Q-Q", + xsmaxdp_3 = "f0000500QQQ", + xsnmaddadp_3 = "f0000508QQQ", + xxlnor_3 = "f0000510QQQ", + xscvdpuxds_2 = "f0000520Q-Q", + xscvspdp_2 = "f0000524Q-Q", + xscvspdpn_2 = "f000052cQ-Q", + xsmindp_3 = "f0000540QQQ", + xsnmaddmdp_3 = "f0000548QQQ", + xxlorc_3 = "f0000550QQQ", + xscvdpsxds_2 = "f0000560Q-Q", + xsabsdp_2 = "f0000564Q-Q", + xscpsgndp_3 = "f0000580QQQ", + xsnmsubadp_3 = "f0000588QQQ", + xxlnand_3 = "f0000590QQQ", + xscvuxddp_2 = "f00005a0Q-Q", + xsnabsdp_2 = "f00005a4Q-Q", + xsnmsubmdp_3 = "f00005c8QQQ", + xxleqv_3 = "f00005d0QQQ", + xscvsxddp_2 = "f00005e0Q-Q", + xsnegdp_2 = "f00005e4Q-Q", + xvmaxsp_3 = "f0000600QQQ", + xvnmaddasp_3 = "f0000608QQQ", + ["xvcmpeqsp._3"] = "f0000618QQQ", + xvcvspuxds_2 = "f0000620Q-Q", + xvcvdpsp_2 = "f0000624Q-Q", + xvminsp_3 = "f0000640QQQ", + xvnmaddmsp_3 = "f0000648QQQ", + ["xvcmpgtsp._3"] = "f0000658QQQ", + xvcvspsxds_2 = "f0000660Q-Q", + xvabssp_2 = "f0000664Q-Q", + xvcpsgnsp_3 = "f0000680QQQ", + xvnmsubasp_3 = "f0000688QQQ", + ["xvcmpgesp._3"] = "f0000698QQQ", + xvcvuxdsp_2 = "f00006a0Q-Q", + xvnabssp_2 = "f00006a4Q-Q", + xvnmsubmsp_3 = "f00006c8QQQ", + xvcvsxdsp_2 = "f00006e0Q-Q", + xvnegsp_2 = "f00006e4Q-Q", + xvmaxdp_3 = "f0000700QQQ", + xvnmaddadp_3 = "f0000708QQQ", + ["xvcmpeqdp._3"] = "f0000718QQQ", + xvcvdpuxds_2 = "f0000720Q-Q", + xvcvspdp_2 = "f0000724Q-Q", + xvmindp_3 = "f0000740QQQ", + xvnmaddmdp_3 = "f0000748QQQ", + ["xvcmpgtdp._3"] = "f0000758QQQ", + xvcvdpsxds_2 = "f0000760Q-Q", + xvabsdp_2 = "f0000764Q-Q", + xvcpsgndp_3 = "f0000780QQQ", + xvnmsubadp_3 = "f0000788QQQ", + ["xvcmpgedp._3"] = "f0000798QQQ", + xvcvuxddp_2 = "f00007a0Q-Q", + xvnabsdp_2 = "f00007a4Q-Q", + xvnmsubmdp_3 = "f00007c8QQQ", + xvcvsxddp_2 = "f00007e0Q-Q", + xvnegdp_2 = "f00007e4Q-Q", + + -- Primary opcode 61: + stfdp_2 = "f4000000F:D", -- NYI: displacement must be divisible by 4. + + -- Primary opcode 62: + stq_2 = "f8000002R:D", -- NYI: displacement must be divisible by 8. + + -- Primary opcode 63: + fdiv_3 = "fc000024FFF.", + fsub_3 = "fc000028FFF.", + fadd_3 = "fc00002aFFF.", + fsqrt_2 = "fc00002cF-F.", + fsel_4 = "fc00002eFFFF~.", + fre_2 = "fc000030F-F.", + fmul_3 = "fc000032FF-F.", + frsqrte_2 = "fc000034F-F.", + fmsub_4 = "fc000038FFFF~.", + fmadd_4 = "fc00003aFFFF~.", + fnmsub_4 = "fc00003cFFFF~.", + fnmadd_4 = "fc00003eFFFF~.", + fcmpu_3 = "fc000000XFF", + fcpsgn_3 = "fc000010FFF.", + fcmpo_3 = "fc000040XFF", + mtfsb1_1 = "fc00004cA", + fneg_2 = "fc000050F-F.", + mcrfs_2 = "fc000080XX", + mtfsb0_1 = "fc00008cA", + fmr_2 = "fc000090F-F.", + frsp_2 = "fc000018F-F.", + fctiw_2 = "fc00001cF-F.", + fctiwz_2 = "fc00001eF-F.", + ftdiv_2 = "fc000100X-F.", + fctiwu_2 = "fc00011cF-F.", + fctiwuz_2 = "fc00011eF-F.", + mtfsfi_2 = "fc00010cAA", -- NYI: upshift. + fnabs_2 = "fc000110F-F.", + ftsqrt_2 = "fc000140X-F.", + fabs_2 = "fc000210F-F.", + frin_2 = "fc000310F-F.", + friz_2 = "fc000350F-F.", + frip_2 = "fc000390F-F.", + frim_2 = "fc0003d0F-F.", + mffs_1 = "fc00048eF.", + -- NYI: mtfsf, mtfsb0, mtfsb1. + fctid_2 = "fc00065cF-F.", + fctidz_2 = "fc00065eF-F.", + fmrgow_3 = "fc00068cFFF", + fcfid_2 = "fc00069cF-F.", + fctidu_2 = "fc00075cF-F.", + fctiduz_2 = "fc00075eF-F.", + fmrgew_3 = "fc00078cFFF", + fcfidu_2 = "fc00079cF-F.", + + daddq_3 = "fc000004F:F:F:.", + dquaq_4 = "fc000006F:F:F:Z.", + dmulq_3 = "fc000044F:F:F:.", + drrndq_4 = "fc000046F:F:F:Z.", + dscliq_3 = "fc000084F:F:6.", + dquaiq_4 = "fc000086SF:~F:Z.", + dscriq_3 = "fc0000c4F:F:6.", + drintxq_4 = "fc0000c61F:~F:Z.", + dcmpoq_3 = "fc000104XF:F:", + dtstexq_3 = "fc000144XF:F:", + dtstdcq_3 = "fc000184XF:6", + dtstdgq_3 = "fc0001c4XF:6", + drintnq_4 = "fc0001c61F:~F:Z.", + dctqpq_2 = "fc000204F:-F:.", + dctfixq_2 = "fc000244F:-F:.", + ddedpdq_3 = "fc000284ZF:~F:.", + dxexq_2 = "fc0002c4F:-F:.", + dsubq_3 = "fc000404F:F:F:.", + ddivq_3 = "fc000444F:F:F:.", + dcmpuq_3 = "fc000504XF:F:", + dtstsfq_3 = "fc000544XF:F:", + drdpq_2 = "fc000604F:-F:.", + dcffixq_2 = "fc000644F:-F:.", + denbcdq_3 = "fc000684YF:~F:.", + diexq_3 = "fc0006c4F:FF:.", + + -- Primary opcode 4, SPE APU extension: + evaddw_3 = "10000200RRR", + evaddiw_3 = "10000202RAR~", + evsubw_3 = "10000204RRR~", + evsubiw_3 = "10000206RAR~", + evabs_2 = "10000208RR", + evneg_2 = "10000209RR", + evextsb_2 = "1000020aRR", + evextsh_2 = "1000020bRR", + evrndw_2 = "1000020cRR", + evcntlzw_2 = "1000020dRR", + evcntlsw_2 = "1000020eRR", + brinc_3 = "1000020fRRR", + evand_3 = "10000211RRR", + evandc_3 = "10000212RRR", + evxor_3 = "10000216RRR", + evor_3 = "10000217RRR", + evmr_2 = "10000217RR=", + evnor_3 = "10000218RRR", + evnot_2 = "10000218RR=", + eveqv_3 = "10000219RRR", + evorc_3 = "1000021bRRR", + evnand_3 = "1000021eRRR", + evsrwu_3 = "10000220RRR", + evsrws_3 = "10000221RRR", + evsrwiu_3 = "10000222RRA", + evsrwis_3 = "10000223RRA", + evslw_3 = "10000224RRR", + evslwi_3 = "10000226RRA", + evrlw_3 = "10000228RRR", + evsplati_2 = "10000229RS", + evrlwi_3 = "1000022aRRA", + evsplatfi_2 = "1000022bRS", + evmergehi_3 = "1000022cRRR", + evmergelo_3 = "1000022dRRR", + evcmpgtu_3 = "10000230XRR", + evcmpgtu_2 = "10000230-RR", + evcmpgts_3 = "10000231XRR", + evcmpgts_2 = "10000231-RR", + evcmpltu_3 = "10000232XRR", + evcmpltu_2 = "10000232-RR", + evcmplts_3 = "10000233XRR", + evcmplts_2 = "10000233-RR", + evcmpeq_3 = "10000234XRR", + evcmpeq_2 = "10000234-RR", + evsel_4 = "10000278RRRW", + evsel_3 = "10000278RRR", + evfsadd_3 = "10000280RRR", + evfssub_3 = "10000281RRR", + evfsabs_2 = "10000284RR", + evfsnabs_2 = "10000285RR", + evfsneg_2 = "10000286RR", + evfsmul_3 = "10000288RRR", + evfsdiv_3 = "10000289RRR", + evfscmpgt_3 = "1000028cXRR", + evfscmpgt_2 = "1000028c-RR", + evfscmplt_3 = "1000028dXRR", + evfscmplt_2 = "1000028d-RR", + evfscmpeq_3 = "1000028eXRR", + evfscmpeq_2 = "1000028e-RR", + evfscfui_2 = "10000290R-R", + evfscfsi_2 = "10000291R-R", + evfscfuf_2 = "10000292R-R", + evfscfsf_2 = "10000293R-R", + evfsctui_2 = "10000294R-R", + evfsctsi_2 = "10000295R-R", + evfsctuf_2 = "10000296R-R", + evfsctsf_2 = "10000297R-R", + evfsctuiz_2 = "10000298R-R", + evfsctsiz_2 = "1000029aR-R", + evfststgt_3 = "1000029cXRR", + evfststgt_2 = "1000029c-RR", + evfststlt_3 = "1000029dXRR", + evfststlt_2 = "1000029d-RR", + evfststeq_3 = "1000029eXRR", + evfststeq_2 = "1000029e-RR", + efsadd_3 = "100002c0RRR", + efssub_3 = "100002c1RRR", + efsabs_2 = "100002c4RR", + efsnabs_2 = "100002c5RR", + efsneg_2 = "100002c6RR", + efsmul_3 = "100002c8RRR", + efsdiv_3 = "100002c9RRR", + efscmpgt_3 = "100002ccXRR", + efscmpgt_2 = "100002cc-RR", + efscmplt_3 = "100002cdXRR", + efscmplt_2 = "100002cd-RR", + efscmpeq_3 = "100002ceXRR", + efscmpeq_2 = "100002ce-RR", + efscfd_2 = "100002cfR-R", + efscfui_2 = "100002d0R-R", + efscfsi_2 = "100002d1R-R", + efscfuf_2 = "100002d2R-R", + efscfsf_2 = "100002d3R-R", + efsctui_2 = "100002d4R-R", + efsctsi_2 = "100002d5R-R", + efsctuf_2 = "100002d6R-R", + efsctsf_2 = "100002d7R-R", + efsctuiz_2 = "100002d8R-R", + efsctsiz_2 = "100002daR-R", + efststgt_3 = "100002dcXRR", + efststgt_2 = "100002dc-RR", + efststlt_3 = "100002ddXRR", + efststlt_2 = "100002dd-RR", + efststeq_3 = "100002deXRR", + efststeq_2 = "100002de-RR", + efdadd_3 = "100002e0RRR", + efdsub_3 = "100002e1RRR", + efdcfuid_2 = "100002e2R-R", + efdcfsid_2 = "100002e3R-R", + efdabs_2 = "100002e4RR", + efdnabs_2 = "100002e5RR", + efdneg_2 = "100002e6RR", + efdmul_3 = "100002e8RRR", + efddiv_3 = "100002e9RRR", + efdctuidz_2 = "100002eaR-R", + efdctsidz_2 = "100002ebR-R", + efdcmpgt_3 = "100002ecXRR", + efdcmpgt_2 = "100002ec-RR", + efdcmplt_3 = "100002edXRR", + efdcmplt_2 = "100002ed-RR", + efdcmpeq_3 = "100002eeXRR", + efdcmpeq_2 = "100002ee-RR", + efdcfs_2 = "100002efR-R", + efdcfui_2 = "100002f0R-R", + efdcfsi_2 = "100002f1R-R", + efdcfuf_2 = "100002f2R-R", + efdcfsf_2 = "100002f3R-R", + efdctui_2 = "100002f4R-R", + efdctsi_2 = "100002f5R-R", + efdctuf_2 = "100002f6R-R", + efdctsf_2 = "100002f7R-R", + efdctuiz_2 = "100002f8R-R", + efdctsiz_2 = "100002faR-R", + efdtstgt_3 = "100002fcXRR", + efdtstgt_2 = "100002fc-RR", + efdtstlt_3 = "100002fdXRR", + efdtstlt_2 = "100002fd-RR", + efdtsteq_3 = "100002feXRR", + efdtsteq_2 = "100002fe-RR", + evlddx_3 = "10000300RR0R", + evldd_2 = "10000301R8", + evldwx_3 = "10000302RR0R", + evldw_2 = "10000303R8", + evldhx_3 = "10000304RR0R", + evldh_2 = "10000305R8", + evlwhex_3 = "10000310RR0R", + evlwhe_2 = "10000311R4", + evlwhoux_3 = "10000314RR0R", + evlwhou_2 = "10000315R4", + evlwhosx_3 = "10000316RR0R", + evlwhos_2 = "10000317R4", + evstddx_3 = "10000320RR0R", + evstdd_2 = "10000321R8", + evstdwx_3 = "10000322RR0R", + evstdw_2 = "10000323R8", + evstdhx_3 = "10000324RR0R", + evstdh_2 = "10000325R8", + evstwhex_3 = "10000330RR0R", + evstwhe_2 = "10000331R4", + evstwhox_3 = "10000334RR0R", + evstwho_2 = "10000335R4", + evstwwex_3 = "10000338RR0R", + evstwwe_2 = "10000339R4", + evstwwox_3 = "1000033cRR0R", + evstwwo_2 = "1000033dR4", + evmhessf_3 = "10000403RRR", + evmhossf_3 = "10000407RRR", + evmheumi_3 = "10000408RRR", + evmhesmi_3 = "10000409RRR", + evmhesmf_3 = "1000040bRRR", + evmhoumi_3 = "1000040cRRR", + evmhosmi_3 = "1000040dRRR", + evmhosmf_3 = "1000040fRRR", + evmhessfa_3 = "10000423RRR", + evmhossfa_3 = "10000427RRR", + evmheumia_3 = "10000428RRR", + evmhesmia_3 = "10000429RRR", + evmhesmfa_3 = "1000042bRRR", + evmhoumia_3 = "1000042cRRR", + evmhosmia_3 = "1000042dRRR", + evmhosmfa_3 = "1000042fRRR", + evmwhssf_3 = "10000447RRR", + evmwlumi_3 = "10000448RRR", + evmwhumi_3 = "1000044cRRR", + evmwhsmi_3 = "1000044dRRR", + evmwhsmf_3 = "1000044fRRR", + evmwssf_3 = "10000453RRR", + evmwumi_3 = "10000458RRR", + evmwsmi_3 = "10000459RRR", + evmwsmf_3 = "1000045bRRR", + evmwhssfa_3 = "10000467RRR", + evmwlumia_3 = "10000468RRR", + evmwhumia_3 = "1000046cRRR", + evmwhsmia_3 = "1000046dRRR", + evmwhsmfa_3 = "1000046fRRR", + evmwssfa_3 = "10000473RRR", + evmwumia_3 = "10000478RRR", + evmwsmia_3 = "10000479RRR", + evmwsmfa_3 = "1000047bRRR", + evmra_2 = "100004c4RR", + evdivws_3 = "100004c6RRR", + evdivwu_3 = "100004c7RRR", + evmwssfaa_3 = "10000553RRR", + evmwumiaa_3 = "10000558RRR", + evmwsmiaa_3 = "10000559RRR", + evmwsmfaa_3 = "1000055bRRR", + evmwssfan_3 = "100005d3RRR", + evmwumian_3 = "100005d8RRR", + evmwsmian_3 = "100005d9RRR", + evmwsmfan_3 = "100005dbRRR", + evmergehilo_3 = "1000022eRRR", + evmergelohi_3 = "1000022fRRR", + evlhhesplatx_3 = "10000308RR0R", + evlhhesplat_2 = "10000309R2", + evlhhousplatx_3 = "1000030cRR0R", + evlhhousplat_2 = "1000030dR2", + evlhhossplatx_3 = "1000030eRR0R", + evlhhossplat_2 = "1000030fR2", + evlwwsplatx_3 = "10000318RR0R", + evlwwsplat_2 = "10000319R4", + evlwhsplatx_3 = "1000031cRR0R", + evlwhsplat_2 = "1000031dR4", + evaddusiaaw_2 = "100004c0RR", + evaddssiaaw_2 = "100004c1RR", + evsubfusiaaw_2 = "100004c2RR", + evsubfssiaaw_2 = "100004c3RR", + evaddumiaaw_2 = "100004c8RR", + evaddsmiaaw_2 = "100004c9RR", + evsubfumiaaw_2 = "100004caRR", + evsubfsmiaaw_2 = "100004cbRR", + evmheusiaaw_3 = "10000500RRR", + evmhessiaaw_3 = "10000501RRR", + evmhessfaaw_3 = "10000503RRR", + evmhousiaaw_3 = "10000504RRR", + evmhossiaaw_3 = "10000505RRR", + evmhossfaaw_3 = "10000507RRR", + evmheumiaaw_3 = "10000508RRR", + evmhesmiaaw_3 = "10000509RRR", + evmhesmfaaw_3 = "1000050bRRR", + evmhoumiaaw_3 = "1000050cRRR", + evmhosmiaaw_3 = "1000050dRRR", + evmhosmfaaw_3 = "1000050fRRR", + evmhegumiaa_3 = "10000528RRR", + evmhegsmiaa_3 = "10000529RRR", + evmhegsmfaa_3 = "1000052bRRR", + evmhogumiaa_3 = "1000052cRRR", + evmhogsmiaa_3 = "1000052dRRR", + evmhogsmfaa_3 = "1000052fRRR", + evmwlusiaaw_3 = "10000540RRR", + evmwlssiaaw_3 = "10000541RRR", + evmwlumiaaw_3 = "10000548RRR", + evmwlsmiaaw_3 = "10000549RRR", + evmheusianw_3 = "10000580RRR", + evmhessianw_3 = "10000581RRR", + evmhessfanw_3 = "10000583RRR", + evmhousianw_3 = "10000584RRR", + evmhossianw_3 = "10000585RRR", + evmhossfanw_3 = "10000587RRR", + evmheumianw_3 = "10000588RRR", + evmhesmianw_3 = "10000589RRR", + evmhesmfanw_3 = "1000058bRRR", + evmhoumianw_3 = "1000058cRRR", + evmhosmianw_3 = "1000058dRRR", + evmhosmfanw_3 = "1000058fRRR", + evmhegumian_3 = "100005a8RRR", + evmhegsmian_3 = "100005a9RRR", + evmhegsmfan_3 = "100005abRRR", + evmhogumian_3 = "100005acRRR", + evmhogsmian_3 = "100005adRRR", + evmhogsmfan_3 = "100005afRRR", + evmwlusianw_3 = "100005c0RRR", + evmwlssianw_3 = "100005c1RRR", + evmwlumianw_3 = "100005c8RRR", + evmwlsmianw_3 = "100005c9RRR", + + -- NYI: Book E instructions. +} + +-- Add mnemonics for "." variants. +do + local t = {} + for k,v in pairs(map_op) do + if type(v) == "string" and sub(v, -1) == "." then + local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2) + t[sub(k, 1, -3).."."..sub(k, -2)] = v2 + end + end + for k,v in pairs(t) do + map_op[k] = v + end +end + +-- Add more branch mnemonics. +for cond,c in pairs(map_cond) do + local b1 = "b"..cond + local c1 = shl(band(c, 3), 16) + (c < 4 and 0x01000000 or 0) + -- bX[l] + map_op[b1.."_1"] = tohex(0x40800000 + c1).."K" + map_op[b1.."y_1"] = tohex(0x40a00000 + c1).."K" + map_op[b1.."l_1"] = tohex(0x40800001 + c1).."K" + map_op[b1.."_2"] = tohex(0x40800000 + c1).."-XK" + map_op[b1.."y_2"] = tohex(0x40a00000 + c1).."-XK" + map_op[b1.."l_2"] = tohex(0x40800001 + c1).."-XK" + -- bXlr[l] + map_op[b1.."lr_0"] = tohex(0x4c800020 + c1) + map_op[b1.."lrl_0"] = tohex(0x4c800021 + c1) + map_op[b1.."ctr_0"] = tohex(0x4c800420 + c1) + map_op[b1.."ctrl_0"] = tohex(0x4c800421 + c1) + -- bXctr[l] + map_op[b1.."lr_1"] = tohex(0x4c800020 + c1).."-X" + map_op[b1.."lrl_1"] = tohex(0x4c800021 + c1).."-X" + map_op[b1.."ctr_1"] = tohex(0x4c800420 + c1).."-X" + map_op[b1.."ctrl_1"] = tohex(0x4c800421 + c1).."-X" +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_fpr(expr) + local r = match(expr, "^f([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_vr(expr) + local r = match(expr, "^v([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_vs(expr) + local r = match(expr, "^vs([1-6]?[0-9])$") + if r then + r = tonumber(r) + if r <= 63 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_cr(expr) + local r = match(expr, "^cr([0-7])$") + if r then return tonumber(r) end + werror("bad condition register name `"..expr.."'") +end + +local function parse_cond(expr) + local r, cond = match(expr, "^4%*cr([0-7])%+(%w%w)$") + if r then + r = tonumber(r) + local c = map_cond[cond] + if c and c < 4 then return r*4+c end + end + werror("bad condition bit name `"..expr.."'") +end + +local parse_ctx = {} + +local loadenv = setfenv and function(s) + local code = loadstring(s, "") + if code then setfenv(code, parse_ctx) end + return code +end or function(s) + return load(s, "", nil, parse_ctx) +end + +-- Try to parse simple arithmetic, too, since some basic ops are aliases. +local function parse_number(n) + local x = tonumber(n) + if x then return x end + local code = loadenv("return "..n) + if code then + local ok, y = pcall(code) + if ok then return y end + end + return nil +end + +local function parse_imm(imm, bits, shift, scale, signed) + local n = parse_number(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^[rfv]([1-3]?[0-9])$") or + match(imm, "^vs([1-6]?[0-9])$") or + match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_shiftmask(imm, isshift) + local n = parse_number(imm) + if n then + if shr(n, 6) == 0 then + local lsb = band(n, 31) + local msb = n - lsb + return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb) + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^r([1-3]?[0-9])$") or + match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMMSH", isshift and 1 or 0, imm) + return 0; + end +end + +local function parse_disp(disp) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + return shl(r, 16) + parse_imm(imm, 16, 0, 0, true) + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + if tp then + waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr)) + return shl(r, 16) + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_u5disp(disp, scale) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + return shl(r, 16) + parse_imm(imm, 5, 11, scale, false) + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + if tp then + waction("IMM", scale*1024+5*32+11, format(tp.ctypefmt, tailr)) + return shl(r, 16) + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +op_template = function(params, template, nparams) + if not params then return sub(template, 9) end + local op = tonumber(sub(template, 1, 8), 16) + local n, rs = 1, 26 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 3 positions (rlwinm). + if secpos+3 > maxsecpos then wflush() end + local pos = wpos() + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + if p == "R" then + rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1 + elseif p == "F" then + rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1 + elseif p == "V" then + rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1 + elseif p == "Q" then + local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5 + local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3) + op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh) + elseif p == "q" then + local vs = parse_vs(params[n]); n = n + 1 + op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5) + elseif p == "A" then + rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1 + elseif p == "S" then + rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, true); n = n + 1 + elseif p == "I" then + op = op + parse_imm(params[n], 16, 0, 0, true); n = n + 1 + elseif p == "U" then + op = op + parse_imm(params[n], 16, 0, 0, false); n = n + 1 + elseif p == "D" then + op = op + parse_disp(params[n]); n = n + 1 + elseif p == "2" then + op = op + parse_u5disp(params[n], 1); n = n + 1 + elseif p == "4" then + op = op + parse_u5disp(params[n], 2); n = n + 1 + elseif p == "8" then + op = op + parse_u5disp(params[n], 3); n = n + 1 + elseif p == "C" then + rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1 + elseif p == "X" then + rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1 + elseif p == "1" then + rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1 + elseif p == "g" then + rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1 + elseif p == "3" then + rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1 + elseif p == "P" then + rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1 + elseif p == "p" then + op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1 + elseif p == "6" then + rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1 + elseif p == "Y" then + rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1 + elseif p == "y" then + rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1 + elseif p == "Z" then + rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1 + elseif p == "z" then + rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1 + elseif p == "W" then + op = op + parse_cr(params[n]); n = n + 1 + elseif p == "G" then + op = op + parse_imm(params[n], 8, 12, 0, false); n = n + 1 + elseif p == "H" then + op = op + parse_shiftmask(params[n], true); n = n + 1 + elseif p == "M" then + op = op + parse_shiftmask(params[n], false); n = n + 1 + elseif p == "J" or p == "K" then + local mode, m, s = parse_label(params[n], false) + if p == "K" then m = m + 2048 end + waction("REL_"..mode, m, s, 1) + n = n + 1 + elseif p == "0" then + if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end + elseif p == "=" or p == "%" then + local t = band(shr(op, p == "%" and rs+5 or rs), 31) + rs = rs - 5 + op = op + shl(t, rs) + elseif p == "~" then + local mm = shl(31, rs) + local lo = band(op, mm) + local hi = band(op, shl(mm, 5)) + op = op - lo - hi + shl(lo, 5) + shr(hi, 5) + elseif p == ":" then + if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end + elseif p == "-" then + rs = rs - 5 + elseif p == "." then + -- Ignored. + else + assert(false) + end + end + wputpos(pos, op) +end + +map_op[".template__"] = op_template + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/lib/LuaJIT/dynasm/dasm_proto.h b/lib/LuaJIT/dynasm/dasm_proto.h new file mode 100644 index 0000000..59d9e2b --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_proto.h @@ -0,0 +1,83 @@ +/* +** DynASM encoding engine prototypes. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#ifndef _DASM_PROTO_H +#define _DASM_PROTO_H + +#include <stddef.h> +#include <stdarg.h> + +#define DASM_IDENT "DynASM 1.4.0" +#define DASM_VERSION 10400 /* 1.4.0 */ + +#ifndef Dst_DECL +#define Dst_DECL dasm_State **Dst +#endif + +#ifndef Dst_REF +#define Dst_REF (*Dst) +#endif + +#ifndef DASM_FDEF +#define DASM_FDEF extern +#endif + +#ifndef DASM_M_GROW +#define DASM_M_GROW(ctx, t, p, sz, need) \ + do { \ + size_t _sz = (sz), _need = (need); \ + if (_sz < _need) { \ + if (_sz < 16) _sz = 16; \ + while (_sz < _need) _sz += _sz; \ + (p) = (t *)realloc((p), _sz); \ + if ((p) == NULL) exit(1); \ + (sz) = _sz; \ + } \ + } while(0) +#endif + +#ifndef DASM_M_FREE +#define DASM_M_FREE(ctx, p, sz) free(p) +#endif + +/* Internal DynASM encoder state. */ +typedef struct dasm_State dasm_State; + + +/* Initialize and free DynASM state. */ +DASM_FDEF void dasm_init(Dst_DECL, int maxsection); +DASM_FDEF void dasm_free(Dst_DECL); + +/* Setup global array. Must be called before dasm_setup(). */ +DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl); + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc); + +/* Setup encoder. */ +DASM_FDEF void dasm_setup(Dst_DECL, const void *actionlist); + +/* Feed encoder with actions. Calls are generated by pre-processor. */ +DASM_FDEF void dasm_put(Dst_DECL, int start, ...); + +/* Link sections and return the resulting size. */ +DASM_FDEF int dasm_link(Dst_DECL, size_t *szp); + +/* Encode sections into buffer. */ +DASM_FDEF int dasm_encode(Dst_DECL, void *buffer); + +/* Get PC label offset. */ +DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc); + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch); +#else +#define dasm_checkstep(a, b) 0 +#endif + + +#endif /* _DASM_PROTO_H */ diff --git a/lib/LuaJIT/dynasm/dasm_x64.lua b/lib/LuaJIT/dynasm/dasm_x64.lua new file mode 100644 index 0000000..e8bdeb3 --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_x64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM x64 module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined x86/x64 module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +x64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_x86") diff --git a/lib/LuaJIT/dynasm/dasm_x86.h b/lib/LuaJIT/dynasm/dasm_x86.h new file mode 100644 index 0000000..dc14d88 --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_x86.h @@ -0,0 +1,509 @@ +/* +** DynASM x86 encoding engine. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include <stddef.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> + +#define DASM_ARCH "x86" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. DASM_STOP must be 255. */ +enum { + DASM_DISP = 233, + DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB, + DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC, + DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN, + DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_VREG 0x15000000 +#define DASM_S_UNDEF_L 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned char *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status=DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs, mrm = -1; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + int action = *p++; + if (action < DASM_DISP) { + ofs++; + } else if (action <= DASM_REL_A) { + int n = va_arg(ap, int); + b[pos++] = n; + switch (action) { + case DASM_DISP: + if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } + /* fallthrough */ + case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ + case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ + case DASM_IMM_D: ofs += 4; break; + case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; + case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; + case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ + case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; + case DASM_SPACE: p++; ofs += n; break; + case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ + case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG); + if (*p < 0x40 && p[1] == DASM_DISP) mrm = n; + if (*p < 0x20 && (n&7) == 4) ofs++; + switch ((*p++ >> 3) & 3) { + case 3: n |= b[pos-3]; /* fallthrough */ + case 2: n |= b[pos-2]; /* fallthrough */ + case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; } + } + continue; + } + mrm = -1; + } else { + int *pl, n; + switch (action) { + case DASM_REL_LG: + case DASM_IMM_LG: + n = *p++; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n <= 246) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl -= 246; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + ofs += 4; /* Maximum offset needed. */ + if (action == DASM_REL_LG || action == DASM_REL_PC) + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_ALIGN: + ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_EXTERN: p += 2; ofs += 4; break; + case DASM_ESC: p++; ofs++; break; + case DASM_MARK: mrm = p[-2]; break; + case DASM_SECTION: + n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n]; + case DASM_STOP: goto stop; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + int op, action = *p++; + switch (action) { + case DASM_REL_LG: p++; op = p[-3]; goto rel_pc; + case DASM_REL_PC: op = p[-2]; rel_pc: { + int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0); + if (shrink) { /* Shrinkable branch opcode? */ + int lofs, lpos = b[pos]; + if (lpos < 0) goto noshrink; /* Ext global? */ + lofs = *DASM_POS2PTR(D, lpos); + if (lpos > pos) { /* Fwd label: add cumulative section offsets. */ + int i; + for (i = secnum; i < DASM_POS2SEC(lpos); i++) + lofs += D->sections[i].ofs; + } else { + lofs -= ofs; /* Bkwd label: unfix offset. */ + } + lofs -= b[pos+1]; /* Short branch ok? */ + if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */ + else { noshrink: shrink = 0; } /* No, cannot shrink op. */ + } + b[pos+1] = shrink; + pos += 2; + break; + } + /* fallthrough */ + case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; + /* fallthrough */ + case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: + case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: + case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; + case DASM_LABEL_LG: p++; + /* fallthrough */ + case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ + case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ + case DASM_EXTERN: p += 2; break; + case DASM_ESC: p++; break; + case DASM_MARK: break; + case DASM_SECTION: case DASM_STOP: goto stop; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#define dasmb(x) *cp++ = (unsigned char)(x) +#ifndef DASM_ALIGNED_WRITES +#define dasmw(x) \ + do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0) +#define dasmd(x) \ + do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0) +#else +#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) +#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + unsigned char *base = (unsigned char *)buffer; + unsigned char *cp = base; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + unsigned char *mark = NULL; + while (1) { + int action = *p++; + int n = (action >= DASM_DISP && action <= DASM_ALIGN) ? *b++ : 0; + switch (action) { + case DASM_DISP: if (!mark) mark = cp; { + unsigned char *mm = mark; + if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL; + if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7; + if (mrm != 5) { mm[-1] -= 0x80; break; } } + if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; + } + /* fallthrough */ + case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; + case DASM_IMM_DB: if (((n+128)&-256) == 0) { + db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; + } else mark = NULL; + /* fallthrough */ + case DASM_IMM_D: wd: dasmd(n); break; + case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; + /* fallthrough */ + case DASM_IMM_W: dasmw(n); break; + case DASM_VREG: { + int t = *p++; + unsigned char *ex = cp - (t&7); + if ((n & 8) && t < 0xa0) { + if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6); + n &= 7; + } else if (n & 0x10) { + if (*ex & 0x80) { + *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2; + } + while (++ex < cp) ex[-1] = *ex; + if (mark) mark--; + cp--; + n &= 7; + } + if (t >= 0xc0) n <<= 4; + else if (t >= 0x40) n <<= 3; + else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; } + cp[-1] ^= n; + break; + } + case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; + b++; n = (int)(ptrdiff_t)D->globals[-n]; + /* fallthrough */ + case DASM_REL_A: rel_a: + n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ + case DASM_REL_PC: rel_pc: { + int shrink = *b++; + int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } + n = *pb - ((int)(cp-base) + 4-shrink); + if (shrink == 0) goto wd; + if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb; + goto wb; + } + case DASM_IMM_LG: + p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } + /* fallthrough */ + case DASM_IMM_PC: { + int *pb = DASM_POS2PTR(D, n); + n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); + goto wd; + } + case DASM_LABEL_LG: { + int idx = *p++; + if (idx >= 10) + D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n)); + break; + } + case DASM_LABEL_PC: case DASM_SETLABEL: break; + case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; } + case DASM_ALIGN: + n = *p++; + while (((cp-base) & n)) *cp++ = 0x90; /* nop */ + break; + case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; + case DASM_MARK: mark = cp; break; + case DASM_ESC: action = *p++; + /* fallthrough */ + default: *cp++ = action; break; + case DASM_SECTION: case DASM_STOP: goto stop; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); + return D->status; +} +#endif + diff --git a/lib/LuaJIT/dynasm/dasm_x86.lua b/lib/LuaJIT/dynasm/dasm_x86.lua new file mode 100644 index 0000000..7f536af --- /dev/null +++ b/lib/LuaJIT/dynasm/dasm_x86.lua @@ -0,0 +1,2360 @@ +------------------------------------------------------------------------------ +-- DynASM x86/x64 module. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +local x64 = x64 + +-- Module information: +local _info = { + arch = x64 and "x64" or "x86", + description = "DynASM x86/x64 module", + version = "1.4.0", + vernum = 10400, + release = "2015-10-18", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub +local concat, sort, remove = table.concat, table.sort, table.remove +local bit = bit or require("bit") +local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + -- int arg, 1 buffer pos: + "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", + -- action arg (1 byte), int arg, 1 buffer pos (reg/num): + "VREG", "SPACE", + -- ptrdiff_t arg, 1 buffer pos (address): !x64 + "SETLABEL", "REL_A", + -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): + "REL_LG", "REL_PC", + -- action arg (1 byte) or int arg, 1 buffer pos (link): + "IMM_LG", "IMM_PC", + -- action arg (1 byte) or int arg, 1 buffer pos (offset): + "LABEL_LG", "LABEL_PC", + -- action arg (1 byte), 1 buffer pos (offset): + "ALIGN", + -- action args (2 bytes), no buffer pos. + "EXTERN", + -- action arg (1 byte), no buffer pos. + "ESC", + -- no action arg, no buffer pos. + "MARK", + -- action arg (1 byte), no buffer pos, terminal action: + "SECTION", + -- no args, no buffer pos, terminal action: + "STOP" +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number (dynamically generated below). +local map_action = {} +-- First action number. Everything below does not need to be escaped. +local actfirst = 256-#action_names + +-- Action list buffer and string (only used to remove dupes). +local actlist = {} +local actstr = "" + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +-- VREG kind encodings, pre-shifted by 5 bits. +local map_vreg = { + ["modrm.rm.m"] = 0x00, + ["modrm.rm.r"] = 0x20, + ["opcode"] = 0x20, + ["sib.base"] = 0x20, + ["sib.index"] = 0x40, + ["modrm.reg"] = 0x80, + ["vex.v"] = 0xa0, + ["imm.hi"] = 0xc0, +} + +-- Current number of VREG actions contributing to REX/VEX shrinkage. +local vreg_shrink_count = 0 + +------------------------------------------------------------------------------ + +-- Compute action numbers for action names. +for n,name in ipairs(action_names) do + local num = actfirst + n - 1 + map_action[name] = num +end + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + local last = actlist[nn] or 255 + actlist[nn] = nil -- Remove last byte. + if nn == 0 then nn = 1 end + out:write("static const unsigned char ", name, "[", nn, "] = {\n") + local s = " " + for n,b in ipairs(actlist) do + s = s..b.."," + if #s >= 75 then + assert(out:write(s, "\n")) + s = " " + end + end + out:write(s, last, "\n};\n\n") -- Add last byte back. +end + +------------------------------------------------------------------------------ + +-- Add byte to action list. +local function wputxb(n) + assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, a, num) + wputxb(assert(map_action[action], "bad action name `"..action.."'")) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Optionally add a VREG action. +local function wvreg(kind, vreg, psz, sk, defer) + if not vreg then return end + waction("VREG", vreg) + local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'") + if b < (sk or 0) then + vreg_shrink_count = vreg_shrink_count + 1 + end + if not defer then + b = b + vreg_shrink_count * 8 + vreg_shrink_count = 0 + end + wputxb(b + (psz or 0)) +end + +-- Add call to embedded DynASM C code. +local function wcall(func, args) + wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) +end + +-- Delete duplicate action list chunks. A tad slow, but so what. +local function dedupechunk(offset) + local al, as = actlist, actstr + local chunk = char(unpack(al, offset+1, #al)) + local orig = find(as, chunk, 1, true) + if orig then + actargs[1] = orig-1 -- Replace with original offset. + for i=offset+1,#al do al[i] = nil end -- Kill dupe. + else + actstr = as..chunk + end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + local offset = actargs[1] + if #actlist == offset then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + dedupechunk(offset) + wcall("put", actargs) -- Add call to dasm_put(). + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped byte. +local function wputb(n) + if n >= actfirst then waction("ESC") end -- Need to escape byte. + wputxb(n) +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 10 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end + local n = next_global + if n > 246 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=10,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=10,next_global-1 do + out:write(" ", prefix, gsub(t[i], "@.*", ""), ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=10,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = -1 +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n < -256 then werror("too many extern labels") end + next_extern = n - 1 + t[name] = n + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + local t = {} + for name, n in pairs(map_extern) do t[-n] = name end + out:write("Extern labels:\n") + for i=1,-next_extern-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + local t = {} + for name, n in pairs(map_extern) do t[-n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=1,-next_extern-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = {} -- Ext. register name -> int. name. +local map_reg_rev = {} -- Int. register name -> ext. name. +local map_reg_num = {} -- Int. register name -> register number. +local map_reg_opsize = {} -- Int. register name -> operand size. +local map_reg_valid_base = {} -- Int. register name -> valid base register? +local map_reg_valid_index = {} -- Int. register name -> valid index register? +local map_reg_needrex = {} -- Int. register name -> need rex vs. no rex. +local reg_list = {} -- Canonical list of int. register names. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for _PTx macros). + +local addrsize = x64 and "q" or "d" -- Size for address operands. + +-- Helper functions to fill register maps. +local function mkrmap(sz, cl, names) + local cname = format("@%s", sz) + reg_list[#reg_list+1] = cname + map_archdef[cl] = cname + map_reg_rev[cname] = cl + map_reg_num[cname] = -1 + map_reg_opsize[cname] = sz + if sz == addrsize or sz == "d" then + map_reg_valid_base[cname] = true + map_reg_valid_index[cname] = true + end + if names then + for n,name in ipairs(names) do + local iname = format("@%s%x", sz, n-1) + reg_list[#reg_list+1] = iname + map_archdef[name] = iname + map_reg_rev[iname] = name + map_reg_num[iname] = n-1 + map_reg_opsize[iname] = sz + if sz == "b" and n > 4 then map_reg_needrex[iname] = false end + if sz == addrsize or sz == "d" then + map_reg_valid_base[iname] = true + map_reg_valid_index[iname] = true + end + end + end + for i=0,(x64 and sz ~= "f") and 15 or 7 do + local needrex = sz == "b" and i > 3 + local iname = format("@%s%x%s", sz, i, needrex and "R" or "") + if needrex then map_reg_needrex[iname] = true end + local name + if sz == "o" or sz == "y" then name = format("%s%d", cl, i) + elseif sz == "f" then name = format("st%d", i) + else name = format("r%d%s", i, sz == addrsize and "" or sz) end + map_archdef[name] = iname + if not map_reg_rev[iname] then + reg_list[#reg_list+1] = iname + map_reg_rev[iname] = name + map_reg_num[iname] = i + map_reg_opsize[iname] = sz + if sz == addrsize or sz == "d" then + map_reg_valid_base[iname] = true + map_reg_valid_index[iname] = true + end + end + end + reg_list[#reg_list+1] = "" +end + +-- Integer registers (qword, dword, word and byte sized). +if x64 then + mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"}) +end +mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"}) +mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) +mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) +map_reg_valid_index[map_archdef.esp] = false +if x64 then map_reg_valid_index[map_archdef.rsp] = false end +if x64 then map_reg_needrex[map_archdef.Rb] = true end +map_archdef["Ra"] = "@"..addrsize + +-- FP registers (internally tword sized, but use "f" as operand size). +mkrmap("f", "Rf") + +-- SSE registers (oword sized, but qword and dword accessible). +mkrmap("o", "xmm") + +-- AVX registers (yword sized, but oword, qword and dword accessible). +mkrmap("y", "ymm") + +-- Operand size prefixes to codes. +local map_opsize = { + byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y", + tword = "t", aword = addrsize, +} + +-- Operand size code to number. +local map_opsizenum = { + b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10, +} + +-- Operand size code to name. +local map_opsizename = { + b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword", + t = "tword", f = "fpword", +} + +-- Valid index register scale factors. +local map_xsc = { + ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3, +} + +-- Condition codes. +local map_cc = { + o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7, + s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15, + c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7, + pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15, +} + + +-- Reverse defines for registers. +function _M.revdef(s) + return gsub(s, "@%w+", map_reg_rev) +end + +-- Dump register names and numbers +local function dumpregs(out) + out:write("Register names, sizes and internal numbers:\n") + for _,reg in ipairs(reg_list) do + if reg == "" then + out:write("\n") + else + local name = map_reg_rev[reg] + local num = map_reg_num[reg] + local opsize = map_opsizename[map_reg_opsize[reg]] + out:write(format(" %-5s %-8s %s\n", name, opsize, + num < 0 and "(variable)" or num)) + end + end +end + +------------------------------------------------------------------------------ + +-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC). +local function wputlabel(aprefix, imm, num) + if type(imm) == "number" then + if imm < 0 then + waction("EXTERN") + wputxb(aprefix == "IMM_" and 0 or 1) + imm = -imm-1 + else + waction(aprefix.."LG", nil, num); + end + wputxb(imm) + else + waction(aprefix.."PC", imm, num) + end +end + +-- Put signed byte or arg. +local function wputsbarg(n) + if type(n) == "number" then + if n < -128 or n > 127 then + werror("signed immediate byte out of range") + end + if n < 0 then n = n + 256 end + wputb(n) + else waction("IMM_S", n) end +end + +-- Put unsigned byte or arg. +local function wputbarg(n) + if type(n) == "number" then + if n < 0 or n > 255 then + werror("unsigned immediate byte out of range") + end + wputb(n) + else waction("IMM_B", n) end +end + +-- Put unsigned word or arg. +local function wputwarg(n) + if type(n) == "number" then + if shr(n, 16) ~= 0 then + werror("unsigned immediate word out of range") + end + wputb(band(n, 255)); wputb(shr(n, 8)); + else waction("IMM_W", n) end +end + +-- Put signed or unsigned dword or arg. +local function wputdarg(n) + local tn = type(n) + if tn == "number" then + wputb(band(n, 255)) + wputb(band(shr(n, 8), 255)) + wputb(band(shr(n, 16), 255)) + wputb(shr(n, 24)) + elseif tn == "table" then + wputlabel("IMM_", n[1], 1) + else + waction("IMM_D", n) + end +end + +-- Put operand-size dependent number or arg (defaults to dword). +local function wputszarg(sz, n) + if not sz or sz == "d" or sz == "q" then wputdarg(n) + elseif sz == "w" then wputwarg(n) + elseif sz == "b" then wputbarg(n) + elseif sz == "s" then wputsbarg(n) + else werror("bad operand size") end +end + +-- Put multi-byte opcode with operand-size dependent modifications. +local function wputop(sz, op, rex, vex, vregr, vregxb) + local psz, sk = 0, nil + if vex then + local tail + if vex.m == 1 and band(rex, 11) == 0 then + if x64 and vregxb then + sk = map_vreg["modrm.reg"] + else + wputb(0xc5) + tail = shl(bxor(band(rex, 4), 4), 5) + psz = 3 + end + end + if not tail then + wputb(0xc4) + wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) + tail = shl(band(rex, 8), 4) + psz = 4 + end + local reg, vreg = 0, nil + if vex.v then + reg = vex.v.reg + if not reg then werror("bad vex operand") end + if reg < 0 then reg = 0; vreg = vex.v.vreg end + end + if sz == "y" or vex.l then tail = tail + 4 end + wputb(tail + shl(bxor(reg, 15), 3) + vex.p) + wvreg("vex.v", vreg) + rex = 0 + if op >= 256 then werror("bad vex opcode") end + else + if rex ~= 0 then + if not x64 then werror("bad operand size") end + elseif (vregr or vregxb) and x64 then + rex = 0x10 + sk = map_vreg["vex.v"] + end + end + local r + if sz == "w" then wputb(102) end + -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] + if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end + if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end + if op >= 65536 then + if rex ~= 0 then + local opc3 = band(op, 0xffff00) + if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then + wputb(64 + band(rex, 15)); rex = 0; psz = 2 + end + end + wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1 + end + if op >= 256 then + local b = shr(op, 8) + if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end + wputb(b); op = band(op, 255); psz = psz + 1 + end + if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end + if sz == "b" then op = op - 1 end + wputb(op) + return psz, sk +end + +-- Put ModRM or SIB formatted byte. +local function wputmodrm(m, s, rm, vs, vrm) + assert(m < 4 and s < 16 and rm < 16, "bad modrm operands") + wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7)) +end + +-- Put ModRM/SIB plus optional displacement. +local function wputmrmsib(t, imark, s, vsreg, psz, sk) + local vreg, vxreg + local reg, xreg = t.reg, t.xreg + if reg and reg < 0 then reg = 0; vreg = t.vreg end + if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end + if s < 0 then s = 0 end + + -- Register mode. + if sub(t.mode, 1, 1) == "r" then + wputmodrm(3, s, reg) + wvreg("modrm.reg", vsreg, psz+1, sk, vreg) + wvreg("modrm.rm.r", vreg, psz+1, sk) + return + end + + local disp = t.disp + local tdisp = type(disp) + -- No base register? + if not reg then + local riprel = false + if xreg then + -- Indexed mode with index register only. + -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) + wputmodrm(0, s, 4) + if imark == "I" then waction("MARK") end + wvreg("modrm.reg", vsreg, psz+1, sk, vxreg) + wputmodrm(t.xsc, xreg, 5) + wvreg("sib.index", vxreg, psz+2, sk) + else + -- Pure 32 bit displacement. + if x64 and tdisp ~= "table" then + wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) + wvreg("modrm.reg", vsreg, psz+1, sk) + if imark == "I" then waction("MARK") end + wputmodrm(0, 4, 5) + else + riprel = x64 + wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) + wvreg("modrm.reg", vsreg, psz+1, sk) + if imark == "I" then waction("MARK") end + end + end + if riprel then -- Emit rip-relative displacement. + if match("UWSiI", imark) then + werror("NYI: rip-relative displacement followed by immediate") + end + -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. + wputlabel("REL_", disp[1], 2) + else + wputdarg(disp) + end + return + end + + local m + if tdisp == "number" then -- Check displacement size at assembly time. + if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) + if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] + elseif disp >= -128 and disp <= 127 then m = 1 + else m = 2 end + elseif tdisp == "table" then + m = 2 + end + + -- Index register present or esp as base register: need SIB encoding. + if xreg or band(reg, 7) == 4 then + wputmodrm(m or 2, s, 4) -- ModRM. + if m == nil or imark == "I" then waction("MARK") end + wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg) + wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. + wvreg("sib.index", vxreg, psz+2, sk, vreg) + wvreg("sib.base", vreg, psz+2, sk) + else + wputmodrm(m or 2, s, reg) -- ModRM. + if (imark == "I" and (m == 1 or m == 2)) or + (m == nil and (vsreg or vreg)) then waction("MARK") end + wvreg("modrm.reg", vsreg, psz+1, sk, vreg) + wvreg("modrm.rm.m", vreg, psz+1, sk) + end + + -- Put displacement. + if m == 1 then wputsbarg(disp) + elseif m == 2 then wputdarg(disp) + elseif m == nil then waction("DISP", disp) end +end + +------------------------------------------------------------------------------ + +-- Return human-readable operand mode string. +local function opmodestr(op, args) + local m = {} + for i=1,#args do + local a = args[i] + m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?") + end + return op.." "..concat(m, ",") +end + +-- Convert number to valid integer or nil. +local function toint(expr) + local n = tonumber(expr) + if n then + if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then + werror("bad integer number `"..expr.."'") + end + return n + end +end + +-- Parse immediate expression. +local function immexpr(expr) + -- &expr (pointer) + if sub(expr, 1, 1) == "&" then + return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2)) + end + + local prefix = sub(expr, 1, 2) + -- =>expr (pc label reference) + if prefix == "=>" then + return "iJ", sub(expr, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "iJ", map_global[sub(expr, 3)] + end + + -- [<>][1-9] (local label reference) + local dir, lnum = match(expr, "^([<>])([1-9])$") + if dir then -- Fwd: 247-255, Bkwd: 1-9. + return "iJ", lnum + (dir == ">" and 246 or 0) + end + + local extname = match(expr, "^extern%s+(%S+)$") + if extname then + return "iJ", map_extern[extname] + end + + -- expr (interpreted as immediate) + return "iI", expr +end + +-- Parse displacement expression: +-num, +-expr, +-opsize*num +local function dispexpr(expr) + local disp = expr == "" and 0 or toint(expr) + if disp then return disp end + local c, dispt = match(expr, "^([+-])%s*(.+)$") + if c == "+" then + expr = dispt + elseif not c then + werror("bad displacement expression `"..expr.."'") + end + local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$") + local ops, imm = map_opsize[opsize], toint(tailops) + if ops and imm then + if c == "-" then imm = -imm end + return imm*map_opsizenum[ops] + end + local mode, iexpr = immexpr(dispt) + if mode == "iJ" then + if c == "-" then werror("cannot invert label reference") end + return { iexpr } + end + return expr -- Need to return original signed expression. +end + +-- Parse register or type expression. +local function rtexpr(expr) + if not expr then return end + local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + local rnum = map_reg_num[reg] + if not rnum then + werror("type `"..(tname or expr).."' needs a register override") + end + if not map_reg_valid_base[reg] then + werror("bad base register override `"..(map_reg_rev[reg] or reg).."'") + end + return reg, rnum, tp + end + return expr, map_reg_num[expr] +end + +-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. +local function parseoperand(param) + local t = {} + + local expr = param + local opsize, tailops = match(param, "^(%w+)%s*(.+)$") + if opsize then + t.opsize = map_opsize[opsize] + if t.opsize then expr = tailops end + end + + local br = match(expr, "^%[%s*(.-)%s*%]$") + repeat + if br then + t.mode = "xm" + + -- [disp] + t.disp = toint(br) + if t.disp then + t.mode = x64 and "xm" or "xmO" + break + end + + -- [reg...] + local tp + local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$") + reg, t.reg, tp = rtexpr(reg) + if not t.reg then + -- [expr] + t.mode = x64 and "xm" or "xmO" + t.disp = dispexpr("+"..br) + break + end + + if t.reg == -1 then + t.vreg, tailr = match(tailr, "^(%b())(.*)$") + if not t.vreg then werror("bad variable register expression") end + end + + -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr] + local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$") + if xsc then + if not map_reg_valid_index[reg] then + werror("bad index register `"..map_reg_rev[reg].."'") + end + t.xsc = map_xsc[xsc] + t.xreg = t.reg + t.vxreg = t.vreg + t.reg = nil + t.vreg = nil + t.disp = dispexpr(tailsc) + break + end + if not map_reg_valid_base[reg] then + werror("bad base register `"..map_reg_rev[reg].."'") + end + + -- [reg] or [reg+-disp] + t.disp = toint(tailr) or (tailr == "" and 0) + if t.disp then break end + + -- [reg+xreg...] + local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$") + xreg, t.xreg, tp = rtexpr(xreg) + if not t.xreg then + -- [reg+-expr] + t.disp = dispexpr(tailr) + break + end + if not map_reg_valid_index[xreg] then + werror("bad index register `"..map_reg_rev[xreg].."'") + end + + if t.xreg == -1 then + t.vxreg, tailx = match(tailx, "^(%b())(.*)$") + if not t.vxreg then werror("bad variable register expression") end + end + + -- [reg+xreg*xsc...] + local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$") + if xsc then + t.xsc = map_xsc[xsc] + tailx = tailsc + end + + -- [...] or [...+-disp] or [...+-expr] + t.disp = dispexpr(tailx) + else + -- imm or opsize*imm + local imm = toint(expr) + if not imm and sub(expr, 1, 1) == "*" and t.opsize then + imm = toint(sub(expr, 2)) + if imm then + imm = imm * map_opsizenum[t.opsize] + t.opsize = nil + end + end + if imm then + if t.opsize then werror("bad operand size override") end + local m = "i" + if imm == 1 then m = m.."1" end + if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end + if imm >= -128 and imm <= 127 then m = m.."S" end + t.imm = imm + t.mode = m + break + end + + local tp + local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$") + reg, t.reg, tp = rtexpr(reg) + if t.reg then + if t.reg == -1 then + t.vreg, tailr = match(tailr, "^(%b())(.*)$") + if not t.vreg then werror("bad variable register expression") end + end + -- reg + if tailr == "" then + if t.opsize then werror("bad operand size override") end + t.opsize = map_reg_opsize[reg] + if t.opsize == "f" then + t.mode = t.reg == 0 and "fF" or "f" + else + if reg == "@w4" or (x64 and reg == "@d4") then + wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'")) + end + t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm") + end + t.needrex = map_reg_needrex[reg] + break + end + + -- type[idx], type[idx].field, type->field -> [reg+offset_expr] + if not tp then werror("bad operand `"..param.."'") end + t.mode = "xm" + t.disp = format(tp.ctypefmt, tailr) + else + t.mode, t.imm = immexpr(expr) + if sub(t.mode, -1) == "J" then + if t.opsize and t.opsize ~= addrsize then + werror("bad operand size override") + end + t.opsize = addrsize + end + end + end + until true + return t +end + +------------------------------------------------------------------------------ +-- x86 Template String Description +-- =============================== +-- +-- Each template string is a list of [match:]pattern pairs, +-- separated by "|". The first match wins. No match means a +-- bad or unsupported combination of operand modes or sizes. +-- +-- The match part and the ":" is omitted if the operation has +-- no operands. Otherwise the first N characters are matched +-- against the mode strings of each of the N operands. +-- +-- The mode string for each operand type is (see parseoperand()): +-- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl +-- FP register: "f", +"F" for st0 +-- Index operand: "xm", +"O" for [disp] (pure offset) +-- Immediate: "i", +"S" for signed 8 bit, +"1" for 1, +-- +"I" for arg, +"P" for pointer +-- Any: +"J" for valid jump targets +-- +-- So a match character "m" (mixed) matches both an integer register +-- and an index operand (to be encoded with the ModRM/SIB scheme). +-- But "r" matches only a register and "x" only an index operand +-- (e.g. for FP memory access operations). +-- +-- The operand size match string starts right after the mode match +-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty. +-- The effective data size of the operation is matched against this list. +-- +-- If only the regular "b", "w", "d", "q", "t" operand sizes are +-- present, then all operands must be the same size. Unspecified sizes +-- are ignored, but at least one operand must have a size or the pattern +-- won't match (use the "byte", "word", "dword", "qword", "tword" +-- operand size overrides. E.g.: mov dword [eax], 1). +-- +-- If the list has a "1" or "2" prefix, the operand size is taken +-- from the respective operand and any other operand sizes are ignored. +-- If the list contains only ".", all operand sizes are ignored. +-- If the list has a "/" prefix, the concatenated (mixed) operand sizes +-- are compared to the match. +-- +-- E.g. "rrdw" matches for either two dword registers or two word +-- registers. "Fx2dq" matches an st0 operand plus an index operand +-- pointing to a dword (float) or qword (double). +-- +-- Every character after the ":" is part of the pattern string: +-- Hex chars are accumulated to form the opcode (left to right). +-- "n" disables the standard opcode mods +-- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q") +-- "X" Force REX.W. +-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. +-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. +-- The spare 3 bits are either filled with the last hex digit or +-- the result from a previous "r"/"R". The opcode is restored. +-- "u" Use VEX encoding, vvvv unused. +-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is +-- removed from the list used by future characters). +-- "w" Use VEX encoding, vvvv from 3rd operand. +-- "L" Force VEX.L +-- +-- All of the following characters force a flush of the opcode: +-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. +-- "s" stores a 4 bit immediate from the last register operand, +-- followed by 4 zero bits. +-- "S" stores a signed 8 bit immediate from the last operand. +-- "U" stores an unsigned 8 bit immediate from the last operand. +-- "W" stores an unsigned 16 bit immediate from the last operand. +-- "i" stores an operand sized immediate from the last operand. +-- "I" dito, but generates an action code to optionally modify +-- the opcode (+2) for a signed 8 bit immediate. +-- "J" generates one of the REL action codes from the last operand. +-- +------------------------------------------------------------------------------ + +-- Template strings for x86 instructions. Ordered by first opcode byte. +-- Unimplemented opcodes (deliberate omissions) are marked with *. +local map_op = { + -- 00-05: add... + -- 06: *push es + -- 07: *pop es + -- 08-0D: or... + -- 0E: *push cs + -- 0F: two byte opcode prefix + -- 10-15: adc... + -- 16: *push ss + -- 17: *pop ss + -- 18-1D: sbb... + -- 1E: *push ds + -- 1F: *pop ds + -- 20-25: and... + es_0 = "26", + -- 27: *daa + -- 28-2D: sub... + cs_0 = "2E", + -- 2F: *das + -- 30-35: xor... + ss_0 = "36", + -- 37: *aaa + -- 38-3D: cmp... + ds_0 = "3E", + -- 3F: *aas + inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m", + dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m", + push_1 = (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or + "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i", + pop_1 = x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m", + -- 60: *pusha, *pushad, *pushaw + -- 61: *popa, *popad, *popaw + -- 62: *bound rdw,x + -- 63: x86: *arpl mw,rw + movsxd_2 = x64 and "rm/qd:63rM", + fs_0 = "64", + gs_0 = "65", + o16_0 = "66", + a16_0 = not x64 and "67" or nil, + a32_0 = x64 and "67", + -- 68: push idw + -- 69: imul rdw,mdw,idw + -- 6A: push ib + -- 6B: imul rdw,mdw,S + -- 6C: *insb + -- 6D: *insd, *insw + -- 6E: *outsb + -- 6F: *outsd, *outsw + -- 70-7F: jcc lb + -- 80: add... mb,i + -- 81: add... mdw,i + -- 82: *undefined + -- 83: add... mdw,S + test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi", + -- 86: xchg rb,mb + -- 87: xchg rdw,mdw + -- 88: mov mb,r + -- 89: mov mdw,r + -- 8A: mov r,mb + -- 8B: mov r,mdw + -- 8C: *mov mdw,seg + lea_2 = "rx1dq:8DrM", + -- 8E: *mov seg,mdw + -- 8F: pop mdw + nop_0 = "90", + xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm", + cbw_0 = "6698", + cwde_0 = "98", + cdqe_0 = "4898", + cwd_0 = "6699", + cdq_0 = "99", + cqo_0 = "4899", + -- 9A: *call iw:idw + wait_0 = "9B", + fwait_0 = "9B", + pushf_0 = "9C", + pushfd_0 = not x64 and "9C", + pushfq_0 = x64 and "9C", + popf_0 = "9D", + popfd_0 = not x64 and "9D", + popfq_0 = x64 and "9D", + sahf_0 = "9E", + lahf_0 = "9F", + mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", + movsb_0 = "A4", + movsw_0 = "66A5", + movsd_0 = "A5", + cmpsb_0 = "A6", + cmpsw_0 = "66A7", + cmpsd_0 = "A7", + -- A8: test Rb,i + -- A9: test Rdw,i + stosb_0 = "AA", + stosw_0 = "66AB", + stosd_0 = "AB", + lodsb_0 = "AC", + lodsw_0 = "66AD", + lodsd_0 = "AD", + scasb_0 = "AE", + scasw_0 = "66AF", + scasd_0 = "AF", + -- B0-B7: mov rb,i + -- B8-BF: mov rdw,i + -- C0: rol... mb,i + -- C1: rol... mdw,i + ret_1 = "i.:nC2W", + ret_0 = "C3", + -- C4: *les rdw,mq + -- C5: *lds rdw,mq + -- C6: mov mb,i + -- C7: mov mdw,i + -- C8: *enter iw,ib + leave_0 = "C9", + -- CA: *retf iw + -- CB: *retf + int3_0 = "CC", + int_1 = "i.:nCDU", + into_0 = "CE", + -- CF: *iret + -- D0: rol... mb,1 + -- D1: rol... mdw,1 + -- D2: rol... mb,cl + -- D3: rol... mb,cl + -- D4: *aam ib + -- D5: *aad ib + -- D6: *salc + -- D7: *xlat + -- D8-DF: floating point ops + -- E0: *loopne + -- E1: *loope + -- E2: *loop + -- E3: *jcxz, *jecxz + -- E4: *in Rb,ib + -- E5: *in Rdw,ib + -- E6: *out ib,Rb + -- E7: *out ib,Rdw + call_1 = x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J", + jmp_1 = x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB + -- EA: *jmp iw:idw + -- EB: jmp ib + -- EC: *in Rb,dx + -- ED: *in Rdw,dx + -- EE: *out dx,Rb + -- EF: *out dx,Rdw + lock_0 = "F0", + int1_0 = "F1", + repne_0 = "F2", + repnz_0 = "F2", + rep_0 = "F3", + repe_0 = "F3", + repz_0 = "F3", + -- F4: *hlt + cmc_0 = "F5", + -- F6: test... mb,i; div... mb + -- F7: test... mdw,i; div... mdw + clc_0 = "F8", + stc_0 = "F9", + -- FA: *cli + cld_0 = "FC", + std_0 = "FD", + -- FE: inc... mb + -- FF: inc... mdw + + -- misc ops + not_1 = "m:F72m", + neg_1 = "m:F73m", + mul_1 = "m:F74m", + imul_1 = "m:F75m", + div_1 = "m:F76m", + idiv_1 = "m:F77m", + + imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi", + imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi", + + movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:", + movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:", + + bswap_1 = "rqd:0FC8r", + bsf_2 = "rmqdw:0FBCrM", + bsr_2 = "rmqdw:0FBDrM", + bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU", + btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU", + btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU", + bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU", + + shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:", + shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:", + + rdtsc_0 = "0F31", -- P1+ + rdpmc_0 = "0F33", -- P6+ + cpuid_0 = "0FA2", -- P1+ + + -- floating point ops + fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m", + fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m", + fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m", + + fpop_0 = "DDD8", -- Alias for fstp st0. + + fist_1 = "xw:nDF2m|xd:DB2m", + fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m", + fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m", + + fxch_0 = "D9C9", + fxch_1 = "ff:D9C8r", + fxch_2 = "fFf:D9C8r|Fff:D9C8R", + + fucom_1 = "ff:DDE0r", + fucom_2 = "Fff:DDE0R", + fucomp_1 = "ff:DDE8r", + fucomp_2 = "Fff:DDE8R", + fucomi_1 = "ff:DBE8r", -- P6+ + fucomi_2 = "Fff:DBE8R", -- P6+ + fucomip_1 = "ff:DFE8r", -- P6+ + fucomip_2 = "Fff:DFE8R", -- P6+ + fcomi_1 = "ff:DBF0r", -- P6+ + fcomi_2 = "Fff:DBF0R", -- P6+ + fcomip_1 = "ff:DFF0r", -- P6+ + fcomip_2 = "Fff:DFF0R", -- P6+ + fucompp_0 = "DAE9", + fcompp_0 = "DED9", + + fldenv_1 = "x.:D94m", + fnstenv_1 = "x.:D96m", + fstenv_1 = "x.:9BD96m", + fldcw_1 = "xw:nD95m", + fstcw_1 = "xw:n9BD97m", + fnstcw_1 = "xw:nD97m", + fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m", + fnstsw_1 = "Rw:nDFE0|xw:nDD7m", + fclex_0 = "9BDBE2", + fnclex_0 = "DBE2", + + fnop_0 = "D9D0", + -- D9D1-D9DF: unassigned + + fchs_0 = "D9E0", + fabs_0 = "D9E1", + -- D9E2: unassigned + -- D9E3: unassigned + ftst_0 = "D9E4", + fxam_0 = "D9E5", + -- D9E6: unassigned + -- D9E7: unassigned + fld1_0 = "D9E8", + fldl2t_0 = "D9E9", + fldl2e_0 = "D9EA", + fldpi_0 = "D9EB", + fldlg2_0 = "D9EC", + fldln2_0 = "D9ED", + fldz_0 = "D9EE", + -- D9EF: unassigned + + f2xm1_0 = "D9F0", + fyl2x_0 = "D9F1", + fptan_0 = "D9F2", + fpatan_0 = "D9F3", + fxtract_0 = "D9F4", + fprem1_0 = "D9F5", + fdecstp_0 = "D9F6", + fincstp_0 = "D9F7", + fprem_0 = "D9F8", + fyl2xp1_0 = "D9F9", + fsqrt_0 = "D9FA", + fsincos_0 = "D9FB", + frndint_0 = "D9FC", + fscale_0 = "D9FD", + fsin_0 = "D9FE", + fcos_0 = "D9FF", + + -- SSE, SSE2 + andnpd_2 = "rmo:660F55rM", + andnps_2 = "rmo:0F55rM", + andpd_2 = "rmo:660F54rM", + andps_2 = "rmo:0F54rM", + clflush_1 = "x.:0FAE7m", + cmppd_3 = "rmio:660FC2rMU", + cmpps_3 = "rmio:0FC2rMU", + cmpsd_3 = "rrio:F20FC2rMU|rxi/oq:", + cmpss_3 = "rrio:F30FC2rMU|rxi/od:", + comisd_2 = "rro:660F2FrM|rx/oq:", + comiss_2 = "rro:0F2FrM|rx/od:", + cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:", + cvtdq2ps_2 = "rmo:0F5BrM", + cvtpd2dq_2 = "rmo:F20FE6rM", + cvtpd2ps_2 = "rmo:660F5ArM", + cvtpi2pd_2 = "rx/oq:660F2ArM", + cvtpi2ps_2 = "rx/oq:0F2ArM", + cvtps2dq_2 = "rmo:660F5BrM", + cvtps2pd_2 = "rro:0F5ArM|rx/oq:", + cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:", + cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", + cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM", + cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM", + cvtss2sd_2 = "rro:F30F5ArM|rx/od:", + cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:", + cvttpd2dq_2 = "rmo:660FE6rM", + cvttps2dq_2 = "rmo:F30F5BrM", + cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:", + cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:", + fxsave_1 = "x.:0FAE0m", + fxrstor_1 = "x.:0FAE1m", + ldmxcsr_1 = "xd:0FAE2m", + lfence_0 = "0FAEE8", + maskmovdqu_2 = "rro:660FF7rM", + mfence_0 = "0FAEF0", + movapd_2 = "rmo:660F28rM|mro:660F29Rm", + movaps_2 = "rmo:0F28rM|mro:0F29Rm", + movd_2 = "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:", + movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", + movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", + movhlps_2 = "rro:0F12rM", + movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm", + movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm", + movlhps_2 = "rro:0F16rM", + movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm", + movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm", + movmskpd_2 = "rr/do:660F50rM", + movmskps_2 = "rr/do:0F50rM", + movntdq_2 = "xro:660FE7Rm", + movnti_2 = "xrqd:0FC3Rm", + movntpd_2 = "xro:660F2BRm", + movntps_2 = "xro:0F2BRm", + movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm", + movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm", + movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", + movupd_2 = "rmo:660F10rM|mro:660F11Rm", + movups_2 = "rmo:0F10rM|mro:0F11Rm", + orpd_2 = "rmo:660F56rM", + orps_2 = "rmo:0F56rM", + pause_0 = "F390", + pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. + pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", + pmovmskb_2 = "rr/do:660FD7rM", + prefetchnta_1 = "xb:n0F180m", + prefetcht0_1 = "xb:n0F181m", + prefetcht1_1 = "xb:n0F182m", + prefetcht2_1 = "xb:n0F183m", + pshufd_3 = "rmio:660F70rMU", + pshufhw_3 = "rmio:F30F70rMU", + pshuflw_3 = "rmio:F20F70rMU", + pslld_2 = "rmo:660FF2rM|rio:660F726mU", + pslldq_2 = "rio:660F737mU", + psllq_2 = "rmo:660FF3rM|rio:660F736mU", + psllw_2 = "rmo:660FF1rM|rio:660F716mU", + psrad_2 = "rmo:660FE2rM|rio:660F724mU", + psraw_2 = "rmo:660FE1rM|rio:660F714mU", + psrld_2 = "rmo:660FD2rM|rio:660F722mU", + psrldq_2 = "rio:660F733mU", + psrlq_2 = "rmo:660FD3rM|rio:660F732mU", + psrlw_2 = "rmo:660FD1rM|rio:660F712mU", + rcpps_2 = "rmo:0F53rM", + rcpss_2 = "rro:F30F53rM|rx/od:", + rsqrtps_2 = "rmo:0F52rM", + rsqrtss_2 = "rmo:F30F52rM", + sfence_0 = "0FAEF8", + shufpd_3 = "rmio:660FC6rMU", + shufps_3 = "rmio:0FC6rMU", + stmxcsr_1 = "xd:0FAE3m", + ucomisd_2 = "rro:660F2ErM|rx/oq:", + ucomiss_2 = "rro:0F2ErM|rx/od:", + unpckhpd_2 = "rmo:660F15rM", + unpckhps_2 = "rmo:0F15rM", + unpcklpd_2 = "rmo:660F14rM", + unpcklps_2 = "rmo:0F14rM", + xorpd_2 = "rmo:660F57rM", + xorps_2 = "rmo:0F57rM", + + -- SSE3 ops + fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m", + addsubpd_2 = "rmo:660FD0rM", + addsubps_2 = "rmo:F20FD0rM", + haddpd_2 = "rmo:660F7CrM", + haddps_2 = "rmo:F20F7CrM", + hsubpd_2 = "rmo:660F7DrM", + hsubps_2 = "rmo:F20F7DrM", + lddqu_2 = "rxo:F20FF0rM", + movddup_2 = "rmo:F20F12rM", + movshdup_2 = "rmo:F30F16rM", + movsldup_2 = "rmo:F30F12rM", + + -- SSSE3 ops + pabsb_2 = "rmo:660F381CrM", + pabsd_2 = "rmo:660F381ErM", + pabsw_2 = "rmo:660F381DrM", + palignr_3 = "rmio:660F3A0FrMU", + phaddd_2 = "rmo:660F3802rM", + phaddsw_2 = "rmo:660F3803rM", + phaddw_2 = "rmo:660F3801rM", + phsubd_2 = "rmo:660F3806rM", + phsubsw_2 = "rmo:660F3807rM", + phsubw_2 = "rmo:660F3805rM", + pmaddubsw_2 = "rmo:660F3804rM", + pmulhrsw_2 = "rmo:660F380BrM", + pshufb_2 = "rmo:660F3800rM", + psignb_2 = "rmo:660F3808rM", + psignd_2 = "rmo:660F380ArM", + psignw_2 = "rmo:660F3809rM", + + -- SSE4.1 ops + blendpd_3 = "rmio:660F3A0DrMU", + blendps_3 = "rmio:660F3A0CrMU", + blendvpd_3 = "rmRo:660F3815rM", + blendvps_3 = "rmRo:660F3814rM", + dppd_3 = "rmio:660F3A41rMU", + dpps_3 = "rmio:660F3A40rMU", + extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU", + insertps_3 = "rrio:660F3A41rMU|rxi/od:", + movntdqa_2 = "rxo:660F382ArM", + mpsadbw_3 = "rmio:660F3A42rMU", + packusdw_2 = "rmo:660F382BrM", + pblendvb_3 = "rmRo:660F3810rM", + pblendw_3 = "rmio:660F3A0ErMU", + pcmpeqq_2 = "rmo:660F3829rM", + pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:", + pextrd_3 = "mri/do:660F3A16RmU", + pextrq_3 = "mri/qo:660F3A16RmU", + -- pextrw is SSE2, mem operand is SSE4.1 only + phminposuw_2 = "rmo:660F3841rM", + pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", + pinsrd_3 = "rmi/od:660F3A22rMU", + pinsrq_3 = "rmi/oq:660F3A22rXMU", + pmaxsb_2 = "rmo:660F383CrM", + pmaxsd_2 = "rmo:660F383DrM", + pmaxud_2 = "rmo:660F383FrM", + pmaxuw_2 = "rmo:660F383ErM", + pminsb_2 = "rmo:660F3838rM", + pminsd_2 = "rmo:660F3839rM", + pminud_2 = "rmo:660F383BrM", + pminuw_2 = "rmo:660F383ArM", + pmovsxbd_2 = "rro:660F3821rM|rx/od:", + pmovsxbq_2 = "rro:660F3822rM|rx/ow:", + pmovsxbw_2 = "rro:660F3820rM|rx/oq:", + pmovsxdq_2 = "rro:660F3825rM|rx/oq:", + pmovsxwd_2 = "rro:660F3823rM|rx/oq:", + pmovsxwq_2 = "rro:660F3824rM|rx/od:", + pmovzxbd_2 = "rro:660F3831rM|rx/od:", + pmovzxbq_2 = "rro:660F3832rM|rx/ow:", + pmovzxbw_2 = "rro:660F3830rM|rx/oq:", + pmovzxdq_2 = "rro:660F3835rM|rx/oq:", + pmovzxwd_2 = "rro:660F3833rM|rx/oq:", + pmovzxwq_2 = "rro:660F3834rM|rx/od:", + pmuldq_2 = "rmo:660F3828rM", + pmulld_2 = "rmo:660F3840rM", + ptest_2 = "rmo:660F3817rM", + roundpd_3 = "rmio:660F3A09rMU", + roundps_3 = "rmio:660F3A08rMU", + roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:", + roundss_3 = "rrio:660F3A0ArMU|rxi/od:", + + -- SSE4.2 ops + crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:", + pcmpestri_3 = "rmio:660F3A61rMU", + pcmpestrm_3 = "rmio:660F3A60rMU", + pcmpgtq_2 = "rmo:660F3837rM", + pcmpistri_3 = "rmio:660F3A63rMU", + pcmpistrm_3 = "rmio:660F3A62rMU", + popcnt_2 = "rmqdw:F30FB8rM", + + -- SSE4a + extrq_2 = "rro:660F79rM", + extrq_3 = "riio:660F780mUU", + insertq_2 = "rro:F20F79rM", + insertq_4 = "rriio:F20F78rMUU", + lzcnt_2 = "rmqdw:F30FBDrM", + movntsd_2 = "xr/qo:nF20F2BRm", + movntss_2 = "xr/do:F30F2BRm", + -- popcnt is also in SSE4.2 + + -- AES-NI + aesdec_2 = "rmo:660F38DErM", + aesdeclast_2 = "rmo:660F38DFrM", + aesenc_2 = "rmo:660F38DCrM", + aesenclast_2 = "rmo:660F38DDrM", + aesimc_2 = "rmo:660F38DBrM", + aeskeygenassist_3 = "rmio:660F3ADFrMU", + pclmulqdq_3 = "rmio:660F3A44rMU", + + -- AVX FP ops + vaddsubpd_3 = "rrmoy:660FVD0rM", + vaddsubps_3 = "rrmoy:F20FVD0rM", + vandpd_3 = "rrmoy:660FV54rM", + vandps_3 = "rrmoy:0FV54rM", + vandnpd_3 = "rrmoy:660FV55rM", + vandnps_3 = "rrmoy:0FV55rM", + vblendpd_4 = "rrmioy:660F3AV0DrMU", + vblendps_4 = "rrmioy:660F3AV0CrMU", + vblendvpd_4 = "rrmroy:660F3AV4BrMs", + vblendvps_4 = "rrmroy:660F3AV4ArMs", + vbroadcastf128_2 = "rx/yo:660F38u1ArM", + vcmppd_4 = "rrmioy:660FVC2rMU", + vcmpps_4 = "rrmioy:0FVC2rMU", + vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:", + vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:", + vcomisd_2 = "rro:660Fu2FrM|rx/oq:", + vcomiss_2 = "rro:0Fu2FrM|rx/od:", + vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:", + vcvtdq2ps_2 = "rmoy:0Fu5BrM", + vcvtpd2dq_2 = "rmoy:F20FuE6rM", + vcvtpd2ps_2 = "rmoy:660Fu5ArM", + vcvtps2dq_2 = "rmoy:660Fu5BrM", + vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:", + vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:", + vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:", + vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM", + vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM", + vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:", + vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:", + vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM", + vcvttps2dq_2 = "rmoy:F30Fu5BrM", + vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:", + vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:", + vdppd_4 = "rrmio:660F3AV41rMU", + vdpps_4 = "rrmioy:660F3AV40rMU", + vextractf128_3 = "mri/oy:660F3AuL19RmU", + vextractps_3 = "mri/do:660F3Au17RmU", + vhaddpd_3 = "rrmoy:660FV7CrM", + vhaddps_3 = "rrmoy:F20FV7CrM", + vhsubpd_3 = "rrmoy:660FV7DrM", + vhsubps_3 = "rrmoy:F20FV7DrM", + vinsertf128_4 = "rrmi/yyo:660F3AV18rMU", + vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:", + vldmxcsr_1 = "xd:0FuAE2m", + vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm", + vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm", + vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm", + vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm", + vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:", + vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm", + vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:", + vmovhlps_3 = "rrro:0FV12rM", + vmovhpd_2 = "xr/qo:660Fu17Rm", + vmovhpd_3 = "rrx/ooq:660FV16rM", + vmovhps_2 = "xr/qo:0Fu17Rm", + vmovhps_3 = "rrx/ooq:0FV16rM", + vmovlhps_3 = "rrro:0FV16rM", + vmovlpd_2 = "xr/qo:660Fu13Rm", + vmovlpd_3 = "rrx/ooq:660FV12rM", + vmovlps_2 = "xr/qo:0Fu13Rm", + vmovlps_3 = "rrx/ooq:0FV12rM", + vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM", + vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM", + vmovntpd_2 = "xroy:660Fu2BRm", + vmovntps_2 = "xroy:0Fu2BRm", + vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm", + vmovsd_3 = "rrro:F20FV10rM", + vmovshdup_2 = "rmoy:F30Fu16rM", + vmovsldup_2 = "rmoy:F30Fu12rM", + vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm", + vmovss_3 = "rrro:F30FV10rM", + vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm", + vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm", + vorpd_3 = "rrmoy:660FV56rM", + vorps_3 = "rrmoy:0FV56rM", + vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU", + vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU", + vperm2f128_4 = "rrmiy:660F3AV06rMU", + vptestpd_2 = "rmoy:660F38u0FrM", + vptestps_2 = "rmoy:660F38u0ErM", + vrcpps_2 = "rmoy:0Fu53rM", + vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", + vrsqrtps_2 = "rmoy:0Fu52rM", + vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", + vroundpd_3 = "rmioy:660F3Au09rMU", + vroundps_3 = "rmioy:660F3Au08rMU", + vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", + vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", + vshufpd_4 = "rrmioy:660FVC6rMU", + vshufps_4 = "rrmioy:0FVC6rMU", + vsqrtps_2 = "rmoy:0Fu51rM", + vsqrtss_2 = "rro:F30Fu51rM|rx/od:", + vsqrtpd_2 = "rmoy:660Fu51rM", + vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:", + vstmxcsr_1 = "xd:0FuAE3m", + vucomisd_2 = "rro:660Fu2ErM|rx/oq:", + vucomiss_2 = "rro:0Fu2ErM|rx/od:", + vunpckhpd_3 = "rrmoy:660FV15rM", + vunpckhps_3 = "rrmoy:0FV15rM", + vunpcklpd_3 = "rrmoy:660FV14rM", + vunpcklps_3 = "rrmoy:0FV14rM", + vxorpd_3 = "rrmoy:660FV57rM", + vxorps_3 = "rrmoy:0FV57rM", + vzeroall_0 = "0FuL77", + vzeroupper_0 = "0Fu77", + + -- AVX2 FP ops + vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:", + vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:", + -- *vgather* (!vsib) + vpermpd_3 = "rmiy:660F3AuX01rMU", + vpermps_3 = "rrmy:660F38V16rM", + + -- AVX, AVX2 integer ops + -- In general, xmm requires AVX, ymm requires AVX2. + vaesdec_3 = "rrmo:660F38VDErM", + vaesdeclast_3 = "rrmo:660F38VDFrM", + vaesenc_3 = "rrmo:660F38VDCrM", + vaesenclast_3 = "rrmo:660F38VDDrM", + vaesimc_2 = "rmo:660F38uDBrM", + vaeskeygenassist_3 = "rmio:660F3AuDFrMU", + vlddqu_2 = "rxoy:F20FuF0rM", + vmaskmovdqu_2 = "rro:660FuF7rM", + vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", + vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm", + vmovntdq_2 = "xroy:660FuE7Rm", + vmovntdqa_2 = "rxoy:660F38u2ArM", + vmpsadbw_4 = "rrmioy:660F3AV42rMU", + vpabsb_2 = "rmoy:660F38u1CrM", + vpabsd_2 = "rmoy:660F38u1ErM", + vpabsw_2 = "rmoy:660F38u1DrM", + vpackusdw_3 = "rrmoy:660F38V2BrM", + vpalignr_4 = "rrmioy:660F3AV0FrMU", + vpblendvb_4 = "rrmroy:660F3AV4CrMs", + vpblendw_4 = "rrmioy:660F3AV0ErMU", + vpclmulqdq_4 = "rrmio:660F3AV44rMU", + vpcmpeqq_3 = "rrmoy:660F38V29rM", + vpcmpestri_3 = "rmio:660F3Au61rMU", + vpcmpestrm_3 = "rmio:660F3Au60rMU", + vpcmpgtq_3 = "rrmoy:660F38V37rM", + vpcmpistri_3 = "rmio:660F3Au63rMU", + vpcmpistrm_3 = "rmio:660F3Au62rMU", + vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:", + vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU", + vpextrd_3 = "mri/do:660F3Au16RmU", + vpextrq_3 = "mri/qo:660F3Au16RmU", + vphaddw_3 = "rrmoy:660F38V01rM", + vphaddd_3 = "rrmoy:660F38V02rM", + vphaddsw_3 = "rrmoy:660F38V03rM", + vphminposuw_2 = "rmo:660F38u41rM", + vphsubw_3 = "rrmoy:660F38V05rM", + vphsubd_3 = "rrmoy:660F38V06rM", + vphsubsw_3 = "rrmoy:660F38V07rM", + vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:", + vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:", + vpinsrd_4 = "rrmi/ood:660F3AV22rMU", + vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU", + vpmaddubsw_3 = "rrmoy:660F38V04rM", + vpmaxsb_3 = "rrmoy:660F38V3CrM", + vpmaxsd_3 = "rrmoy:660F38V3DrM", + vpmaxuw_3 = "rrmoy:660F38V3ErM", + vpmaxud_3 = "rrmoy:660F38V3FrM", + vpminsb_3 = "rrmoy:660F38V38rM", + vpminsd_3 = "rrmoy:660F38V39rM", + vpminuw_3 = "rrmoy:660F38V3ArM", + vpminud_3 = "rrmoy:660F38V3BrM", + vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM", + vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:", + vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:", + vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:", + vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:", + vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:", + vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:", + vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:", + vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:", + vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:", + vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:", + vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:", + vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:", + vpmuldq_3 = "rrmoy:660F38V28rM", + vpmulhrsw_3 = "rrmoy:660F38V0BrM", + vpmulld_3 = "rrmoy:660F38V40rM", + vpshufb_3 = "rrmoy:660F38V00rM", + vpshufd_3 = "rmioy:660Fu70rMU", + vpshufhw_3 = "rmioy:F30Fu70rMU", + vpshuflw_3 = "rmioy:F20Fu70rMU", + vpsignb_3 = "rrmoy:660F38V08rM", + vpsignw_3 = "rrmoy:660F38V09rM", + vpsignd_3 = "rrmoy:660F38V0ArM", + vpslldq_3 = "rrioy:660Fv737mU", + vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU", + vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU", + vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU", + vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU", + vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU", + vpsrldq_3 = "rrioy:660Fv733mU", + vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU", + vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU", + vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU", + vptest_2 = "rmoy:660F38u17rM", + + -- AVX2 integer ops + vbroadcasti128_2 = "rx/yo:660F38u5ArM", + vinserti128_4 = "rrmi/yyo:660F3AV38rMU", + vextracti128_3 = "mri/oy:660F3AuL39RmU", + vpblendd_4 = "rrmioy:660F3AV02rMU", + vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:", + vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:", + vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:", + vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:", + vpermd_3 = "rrmy:660F38V36rM", + vpermq_3 = "rmiy:660F3AuX00rMU", + -- *vpgather* (!vsib) + vperm2i128_4 = "rrmiy:660F3AV46rMU", + vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm", + vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm", + vpsllvd_3 = "rrmoy:660F38V47rM", + vpsllvq_3 = "rrmoy:660F38VX47rM", + vpsravd_3 = "rrmoy:660F38V46rM", + vpsrlvd_3 = "rrmoy:660F38V45rM", + vpsrlvq_3 = "rrmoy:660F38VX45rM", + + -- Intel ADX + adcx_2 = "rmqd:660F38F6rM", + adox_2 = "rmqd:F30F38F6rM", + + -- BMI1 + andn_3 = "rrmqd:0F38VF2rM", + bextr_3 = "rmrqd:0F38wF7rM", + blsi_2 = "rmqd:0F38vF33m", + blsmsk_2 = "rmqd:0F38vF32m", + blsr_2 = "rmqd:0F38vF31m", + tzcnt_2 = "rmqdw:F30FBCrM", + + -- BMI2 + bzhi_3 = "rmrqd:0F38wF5rM", + mulx_3 = "rrmqd:F20F38VF6rM", + pdep_3 = "rrmqd:F20F38VF5rM", + pext_3 = "rrmqd:F30F38VF5rM", + rorx_3 = "rmSqd:F20F3AuF0rMS", + sarx_3 = "rmrqd:F30F38wF7rM", + shrx_3 = "rmrqd:F20F38wF7rM", + shlx_3 = "rmrqd:660F38wF7rM", + + -- FMA3 + vfmaddsub132pd_3 = "rrmoy:660F38VX96rM", + vfmaddsub132ps_3 = "rrmoy:660F38V96rM", + vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM", + vfmaddsub213ps_3 = "rrmoy:660F38VA6rM", + vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM", + vfmaddsub231ps_3 = "rrmoy:660F38VB6rM", + + vfmsubadd132pd_3 = "rrmoy:660F38VX97rM", + vfmsubadd132ps_3 = "rrmoy:660F38V97rM", + vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM", + vfmsubadd213ps_3 = "rrmoy:660F38VA7rM", + vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM", + vfmsubadd231ps_3 = "rrmoy:660F38VB7rM", + + vfmadd132pd_3 = "rrmoy:660F38VX98rM", + vfmadd132ps_3 = "rrmoy:660F38V98rM", + vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:", + vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:", + vfmadd213pd_3 = "rrmoy:660F38VXA8rM", + vfmadd213ps_3 = "rrmoy:660F38VA8rM", + vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:", + vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:", + vfmadd231pd_3 = "rrmoy:660F38VXB8rM", + vfmadd231ps_3 = "rrmoy:660F38VB8rM", + vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:", + vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:", + + vfmsub132pd_3 = "rrmoy:660F38VX9ArM", + vfmsub132ps_3 = "rrmoy:660F38V9ArM", + vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:", + vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:", + vfmsub213pd_3 = "rrmoy:660F38VXAArM", + vfmsub213ps_3 = "rrmoy:660F38VAArM", + vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:", + vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:", + vfmsub231pd_3 = "rrmoy:660F38VXBArM", + vfmsub231ps_3 = "rrmoy:660F38VBArM", + vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:", + vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:", + + vfnmadd132pd_3 = "rrmoy:660F38VX9CrM", + vfnmadd132ps_3 = "rrmoy:660F38V9CrM", + vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:", + vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:", + vfnmadd213pd_3 = "rrmoy:660F38VXACrM", + vfnmadd213ps_3 = "rrmoy:660F38VACrM", + vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:", + vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:", + vfnmadd231pd_3 = "rrmoy:660F38VXBCrM", + vfnmadd231ps_3 = "rrmoy:660F38VBCrM", + vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:", + vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:", + + vfnmsub132pd_3 = "rrmoy:660F38VX9ErM", + vfnmsub132ps_3 = "rrmoy:660F38V9ErM", + vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:", + vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:", + vfnmsub213pd_3 = "rrmoy:660F38VXAErM", + vfnmsub213ps_3 = "rrmoy:660F38VAErM", + vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:", + vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:", + vfnmsub231pd_3 = "rrmoy:660F38VXBErM", + vfnmsub231ps_3 = "rrmoy:660F38VBErM", + vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:", + vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:", +} + +------------------------------------------------------------------------------ + +-- Arithmetic ops. +for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3, + ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do + local n8 = shl(n, 3) + map_op[name.."_2"] = format( + "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi", + 1+n8, 3+n8, n, n, 5+n8, n) +end + +-- Shift ops. +for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, + shl = 4, shr = 5, sar = 7, sal = 4 } do + map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n) +end + +-- Conditional ops. +for cc,n in pairs(map_cc) do + map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X + map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) + map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+ +end + +-- FP arithmetic ops. +for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3, + sub = 4, subr = 5, div = 6, divr = 7 } do + local nc = 0xc0 + shl(n, 3) + local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) + local fn = "f"..name + map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n) + if n == 2 or n == 3 then + map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n) + else + map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n) + map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) + map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) + end + map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n) +end + +-- FP conditional moves. +for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do + local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6) + map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+ + map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ +end + +-- SSE / AVX FP arithmetic ops. +for name,n in pairs{ sqrt = 1, add = 8, mul = 9, + sub = 12, min = 13, div = 14, max = 15 } do + map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) + map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) + map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) + map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) + if n ~= 1 then + map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n) + map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n) + map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n) + map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n) + end +end + +-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf). +for name,n in pairs{ + paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4, + paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B, + packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC, + paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0, + pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76, + pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66, + pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE, + pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA, + pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5, + pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8, + psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8, + psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9, + punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A, + punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61, + punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF +} do + map_op[name.."_2"] = format("rmo:660F%02XrM", n) + map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n) +end + +------------------------------------------------------------------------------ + +local map_vexarg = { u = false, v = 1, V = 2 } + +-- Process pattern string. +local function dopattern(pat, args, sz, op, needrex) + local digit, addin, vex + local opcode = 0 + local szov = sz + local narg = 1 + local rex = 0 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 6 positions. + if secpos+6 > maxsecpos then wflush() end + + -- Process each character. + for c in gmatch(pat.."|", ".") do + if match(c, "%x") then -- Hex digit. + digit = byte(c) - 48 + if digit > 48 then digit = digit - 39 + elseif digit > 16 then digit = digit - 7 end + opcode = opcode*16 + digit + addin = nil + elseif c == "n" then -- Disable operand size mods for opcode. + szov = nil + elseif c == "X" then -- Force REX.W. + rex = 8 + elseif c == "L" then -- Force VEX.L. + vex.l = true + elseif c == "r" then -- Merge 1st operand regno. into opcode. + addin = args[1]; opcode = opcode + (addin.reg % 8) + if narg < 2 then narg = 2 end + elseif c == "R" then -- Merge 2nd operand regno. into opcode. + addin = args[2]; opcode = opcode + (addin.reg % 8) + narg = 3 + elseif c == "m" or c == "M" then -- Encode ModRM/SIB. + local s + if addin then + s = addin.reg + opcode = opcode - band(s, 7) -- Undo regno opcode merge. + else + s = band(opcode, 15) -- Undo last digit. + opcode = shr(opcode, 4) + end + local nn = c == "m" and 1 or 2 + local t = args[nn] + if narg <= nn then narg = nn + 1 end + if szov == "q" and rex == 0 then rex = rex + 8 end + if t.reg and t.reg > 7 then rex = rex + 1 end + if t.xreg and t.xreg > 7 then rex = rex + 2 end + if s > 7 then rex = rex + 4 end + if needrex then rex = rex + 16 end + local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg) + opcode = nil + local imark = sub(pat, -1) -- Force a mark (ugly). + -- Put ModRM/SIB with regno/last digit as spare. + wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk) + addin = nil + elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix + local b = band(opcode, 255); opcode = shr(opcode, 8) + local m = 1 + if b == 0x38 then m = 2 + elseif b == 0x3a then m = 3 end + if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end + if b ~= 0x0f then + werror("expected `0F', `0F38', or `0F3A' to precede `"..c.. + "' in pattern `"..pat.."' for `"..op.."'") + end + local v = map_vexarg[c] + if v then v = remove(args, v) end + b = band(opcode, 255) + local p = 0 + if b == 0x66 then p = 1 + elseif b == 0xf3 then p = 2 + elseif b == 0xf2 then p = 3 end + if p ~= 0 then opcode = shr(opcode, 8) end + if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end + vex = { m = m, p = p, v = v } + else + if opcode then -- Flush opcode. + if szov == "q" and rex == 0 then rex = rex + 8 end + if needrex then rex = rex + 16 end + if addin and addin.reg == -1 then + local psz, sk = wputop(szov, opcode - 7, rex, vex, true) + wvreg("opcode", addin.vreg, psz, sk) + else + if addin and addin.reg > 7 then rex = rex + 1 end + wputop(szov, opcode, rex, vex) + end + opcode = nil + end + if c == "|" then break end + if c == "o" then -- Offset (pure 32 bit displacement). + wputdarg(args[1].disp); if narg < 2 then narg = 2 end + elseif c == "O" then + wputdarg(args[2].disp); narg = 3 + else + -- Anything else is an immediate operand. + local a = args[narg] + narg = narg + 1 + local mode, imm = a.mode, a.imm + if mode == "iJ" and not match("iIJ", c) then + werror("bad operand size for label") + end + if c == "S" then + wputsbarg(imm) + elseif c == "U" then + wputbarg(imm) + elseif c == "W" then + wputwarg(imm) + elseif c == "i" or c == "I" then + if mode == "iJ" then + wputlabel("IMM_", imm, 1) + elseif mode == "iI" and c == "I" then + waction(sz == "w" and "IMM_WB" or "IMM_DB", imm) + else + wputszarg(sz, imm) + end + elseif c == "J" then + if mode == "iPJ" then + waction("REL_A", imm) -- !x64 (secpos) + else + wputlabel("REL_", imm, 2) + end + elseif c == "s" then + local reg = a.reg + if reg < 0 then + wputb(0) + wvreg("imm.hi", a.vreg) + else + wputb(shl(reg, 4)) + end + else + werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") + end + end + end + end +end + +------------------------------------------------------------------------------ + +-- Mapping of operand modes to short names. Suppress output with '#'. +local map_modename = { + r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm", + f = "stx", F = "st0", J = "lbl", ["1"] = "1", + I = "#", S = "#", O = "#", +} + +-- Return a table/string showing all possible operand modes. +local function templatehelp(template, nparams) + if nparams == 0 then return "" end + local t = {} + for tm in gmatch(template, "[^%|]+") do + local s = map_modename[sub(tm, 1, 1)] + s = s..gsub(sub(tm, 2, nparams), ".", function(c) + return ", "..map_modename[c] + end) + if not match(s, "#") then t[#t+1] = s end + end + return t +end + +-- Match operand modes against mode match part of template. +local function matchtm(tm, args) + for i=1,#args do + if not match(args[i].mode, sub(tm, i, i)) then return end + end + return true +end + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return templatehelp(template, nparams) end + local args = {} + + -- Zero-operand opcodes have no match part. + if #params == 0 then + dopattern(template, args, "d", params.op, nil) + return + end + + -- Determine common operand size (coerce undefined size) or flag as mixed. + local sz, szmix, needrex + for i,p in ipairs(params) do + args[i] = parseoperand(p) + local nsz = args[i].opsize + if nsz then + if sz and sz ~= nsz then szmix = true else sz = nsz end + end + local nrex = args[i].needrex + if nrex ~= nil then + if needrex == nil then + needrex = nrex + elseif needrex ~= nrex then + werror("bad mix of byte-addressable registers") + end + end + end + + -- Try all match:pattern pairs (separated by '|'). + local gotmatch, lastpat + for tm in gmatch(template, "[^%|]+") do + -- Split off size match (starts after mode match) and pattern string. + local szm, pat = match(tm, "^(.-):(.*)$", #args+1) + if pat == "" then pat = lastpat else lastpat = pat end + if matchtm(tm, args) then + local prefix = sub(szm, 1, 1) + if prefix == "/" then -- Exactly match leading operand sizes. + for i = #szm,1,-1 do + if i == 1 then + dopattern(pat, args, sz, params.op, needrex) -- Process pattern. + return + elseif args[i-1].opsize ~= sub(szm, i, i) then + break + end + end + else -- Match common operand size. + local szp = sz + if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes. + if prefix == "1" then szp = args[1].opsize; szmix = nil + elseif prefix == "2" then szp = args[2].opsize; szmix = nil end + if not szmix and (prefix == "." or match(szm, szp or "#")) then + dopattern(pat, args, szp, params.op, needrex) -- Process pattern. + return + end + end + gotmatch = true + end + end + + local msg = "bad operand mode" + if gotmatch then + if szmix then + msg = "mixed operand size" + else + msg = sz and "bad operand size" or "missing operand size" + end + end + + werror(msg.." in `"..opmodestr(params.op, args).."'") +end + +------------------------------------------------------------------------------ + +-- x64-specific opcode for 64 bit immediates and displacements. +if x64 then + function map_op.mov64_2(params) + if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end + if secpos+2 > maxsecpos then wflush() end + local opcode, op64, sz, rex, vreg + local op64 = match(params[1], "^%[%s*(.-)%s*%]$") + if op64 then + local a = parseoperand(params[2]) + if a.mode ~= "rmR" then werror("bad operand mode") end + sz = a.opsize + rex = sz == "q" and 8 or 0 + opcode = 0xa3 + else + op64 = match(params[2], "^%[%s*(.-)%s*%]$") + local a = parseoperand(params[1]) + if op64 then + if a.mode ~= "rmR" then werror("bad operand mode") end + sz = a.opsize + rex = sz == "q" and 8 or 0 + opcode = 0xa1 + else + if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then + werror("bad operand mode") + end + op64 = params[2] + if a.reg == -1 then + vreg = a.vreg + opcode = 0xb8 + else + opcode = 0xb8 + band(a.reg, 7) + end + rex = a.reg > 7 and 9 or 8 + end + end + local psz, sk = wputop(sz, opcode, rex, nil, vreg) + wvreg("opcode", vreg, psz, sk) + waction("IMM_D", format("(unsigned int)(%s)", op64)) + waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) + end +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +local function op_data(params) + if not params then return "imm..." end + local sz = sub(params.op, 2, 2) + if sz == "a" then sz = addrsize end + for _,p in ipairs(params) do + local a = parseoperand(p) + if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then + werror("bad mode or size in `"..p.."'") + end + if a.mode == "iJ" then + wputlabel("IMM_", a.imm, 1) + else + wputszarg(sz, a.imm) + end + if secpos+2 > maxsecpos then wflush() end + end +end + +map_op[".byte_*"] = op_data +map_op[".sbyte_*"] = op_data +map_op[".word_*"] = op_data +map_op[".dword_*"] = op_data +map_op[".aword_*"] = op_data + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_2"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end + if secpos+2 > maxsecpos then wflush() end + local a = parseoperand(params[1]) + local mode, imm = a.mode, a.imm + if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then + -- Local label (1: ... 9:) or global label (->global:). + waction("LABEL_LG", nil, 1) + wputxb(imm) + elseif mode == "iJ" then + -- PC label (=>pcexpr:). + waction("LABEL_PC", imm) + else + werror("bad label definition") + end + -- SETLABEL must immediately follow LABEL_LG/LABEL_PC. + local addr = params[2] + if addr then + local a = parseoperand(addr) + if a.mode == "iPJ" then + waction("SETLABEL", a.imm) + else + werror("bad label assignment") + end + end +end +map_op[".label_1"] = map_op[".label_2"] + +------------------------------------------------------------------------------ + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]] + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", nil, 1) + wputxb(align-1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +-- Spacing pseudo-opcode. +map_op[".space_2"] = function(params) + if not params then return "num [, filler]" end + if secpos+1 > maxsecpos then wflush() end + waction("SPACE", params[1]) + local fill = params[2] + if fill then + fill = tonumber(fill) + if not fill or fill < 0 or fill > 255 then werror("bad filler") end + end + wputxb(fill or 0) +end +map_op[".space_1"] = map_op[".space_2"] + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + if reg and not map_reg_valid_base[reg] then + werror("bad base register `"..(map_reg_rev[reg] or reg).."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg and map_reg_rev[tp.reg] or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION") + wputxb(num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpregs(out) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/lib/LuaJIT/dynasm/dynasm.lua b/lib/LuaJIT/dynasm/dynasm.lua new file mode 100644 index 0000000..5ec21a7 --- /dev/null +++ b/lib/LuaJIT/dynasm/dynasm.lua @@ -0,0 +1,1094 @@ +------------------------------------------------------------------------------ +-- DynASM. A dynamic assembler for code generation engines. +-- Originally designed and implemented for LuaJIT. +-- +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- See below for full copyright notice. +------------------------------------------------------------------------------ + +-- Application information. +local _info = { + name = "DynASM", + description = "A dynamic assembler for code generation engines", + version = "1.4.0", + vernum = 10400, + release = "2015-10-18", + author = "Mike Pall", + url = "http://luajit.org/dynasm.html", + license = "MIT", + copyright = [[ +Copyright (C) 2005-2017 Mike Pall. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +[ MIT license: http://www.opensource.org/licenses/mit-license.php ] +]], +} + +-- Cache library functions. +local type, pairs, ipairs = type, pairs, ipairs +local pcall, error, assert = pcall, error, assert +local _s = string +local sub, match, gmatch, gsub = _s.sub, _s.match, _s.gmatch, _s.gsub +local format, rep, upper = _s.format, _s.rep, _s.upper +local _t = table +local insert, remove, concat, sort = _t.insert, _t.remove, _t.concat, _t.sort +local exit = os.exit +local io = io +local stdin, stdout, stderr = io.stdin, io.stdout, io.stderr + +------------------------------------------------------------------------------ + +-- Program options. +local g_opt = {} + +-- Global state for current file. +local g_fname, g_curline, g_indent, g_lineno, g_synclineno, g_arch +local g_errcount = 0 + +-- Write buffer for output file. +local g_wbuffer, g_capbuffer + +------------------------------------------------------------------------------ + +-- Write an output line (or callback function) to the buffer. +local function wline(line, needindent) + local buf = g_capbuffer or g_wbuffer + buf[#buf+1] = needindent and g_indent..line or line + g_synclineno = g_synclineno + 1 +end + +-- Write assembler line as a comment, if requestd. +local function wcomment(aline) + if g_opt.comment then + wline(g_opt.comment..aline..g_opt.endcomment, true) + end +end + +-- Resync CPP line numbers. +local function wsync() + if g_synclineno ~= g_lineno and g_opt.cpp then + wline("#line "..g_lineno..' "'..g_fname..'"') + g_synclineno = g_lineno + end +end + +-- Dummy action flush function. Replaced with arch-specific function later. +local function wflush(term) +end + +-- Dump all buffered output lines. +local function wdumplines(out, buf) + for _,line in ipairs(buf) do + if type(line) == "string" then + assert(out:write(line, "\n")) + else + -- Special callback to dynamically insert lines after end of processing. + line(out) + end + end +end + +------------------------------------------------------------------------------ + +-- Emit an error. Processing continues with next statement. +local function werror(msg) + error(format("%s:%s: error: %s:\n%s", g_fname, g_lineno, msg, g_curline), 0) +end + +-- Emit a fatal error. Processing stops. +local function wfatal(msg) + g_errcount = "fatal" + werror(msg) +end + +-- Print a warning. Processing continues. +local function wwarn(msg) + stderr:write(format("%s:%s: warning: %s:\n%s\n", + g_fname, g_lineno, msg, g_curline)) +end + +-- Print caught error message. But suppress excessive errors. +local function wprinterr(...) + if type(g_errcount) == "number" then + -- Regular error. + g_errcount = g_errcount + 1 + if g_errcount < 21 then -- Seems to be a reasonable limit. + stderr:write(...) + elseif g_errcount == 21 then + stderr:write(g_fname, + ":*: warning: too many errors (suppressed further messages).\n") + end + else + -- Fatal error. + stderr:write(...) + return true -- Stop processing. + end +end + +------------------------------------------------------------------------------ + +-- Map holding all option handlers. +local opt_map = {} +local opt_current + +-- Print error and exit with error status. +local function opterror(...) + stderr:write("dynasm.lua: ERROR: ", ...) + stderr:write("\n") + exit(1) +end + +-- Get option parameter. +local function optparam(args) + local argn = args.argn + local p = args[argn] + if not p then + opterror("missing parameter for option `", opt_current, "'.") + end + args.argn = argn + 1 + return p +end + +------------------------------------------------------------------------------ + +-- Core pseudo-opcodes. +local map_coreop = {} +-- Dummy opcode map. Replaced by arch-specific map. +local map_op = {} + +-- Forward declarations. +local dostmt +local readfile + +------------------------------------------------------------------------------ + +-- Map for defines (initially empty, chains to arch-specific map). +local map_def = {} + +-- Pseudo-opcode to define a substitution. +map_coreop[".define_2"] = function(params, nparams) + if not params then return nparams == 1 and "name" or "name, subst" end + local name, def = params[1], params[2] or "1" + if not match(name, "^[%a_][%w_]*$") then werror("bad or duplicate define") end + map_def[name] = def +end +map_coreop[".define_1"] = map_coreop[".define_2"] + +-- Define a substitution on the command line. +function opt_map.D(args) + local namesubst = optparam(args) + local name, subst = match(namesubst, "^([%a_][%w_]*)=(.*)$") + if name then + map_def[name] = subst + elseif match(namesubst, "^[%a_][%w_]*$") then + map_def[namesubst] = "1" + else + opterror("bad define") + end +end + +-- Undefine a substitution on the command line. +function opt_map.U(args) + local name = optparam(args) + if match(name, "^[%a_][%w_]*$") then + map_def[name] = nil + else + opterror("bad define") + end +end + +-- Helper for definesubst. +local gotsubst + +local function definesubst_one(word) + local subst = map_def[word] + if subst then gotsubst = word; return subst else return word end +end + +-- Iteratively substitute defines. +local function definesubst(stmt) + -- Limit number of iterations. + for i=1,100 do + gotsubst = false + stmt = gsub(stmt, "#?[%w_]+", definesubst_one) + if not gotsubst then break end + end + if gotsubst then wfatal("recursive define involving `"..gotsubst.."'") end + return stmt +end + +-- Dump all defines. +local function dumpdefines(out, lvl) + local t = {} + for name in pairs(map_def) do + t[#t+1] = name + end + sort(t) + out:write("Defines:\n") + for _,name in ipairs(t) do + local subst = map_def[name] + if g_arch then subst = g_arch.revdef(subst) end + out:write(format(" %-20s %s\n", name, subst)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Support variables for conditional assembly. +local condlevel = 0 +local condstack = {} + +-- Evaluate condition with a Lua expression. Substitutions already performed. +local function cond_eval(cond) + local func, err + if setfenv then + func, err = loadstring("return "..cond, "=expr") + else + -- No globals. All unknown identifiers evaluate to nil. + func, err = load("return "..cond, "=expr", "t", {}) + end + if func then + if setfenv then + setfenv(func, {}) -- No globals. All unknown identifiers evaluate to nil. + end + local ok, res = pcall(func) + if ok then + if res == 0 then return false end -- Oh well. + return not not res + end + err = res + end + wfatal("bad condition: "..err) +end + +-- Skip statements until next conditional pseudo-opcode at the same level. +local function stmtskip() + local dostmt_save = dostmt + local lvl = 0 + dostmt = function(stmt) + local op = match(stmt, "^%s*(%S+)") + if op == ".if" then + lvl = lvl + 1 + elseif lvl ~= 0 then + if op == ".endif" then lvl = lvl - 1 end + elseif op == ".elif" or op == ".else" or op == ".endif" then + dostmt = dostmt_save + dostmt(stmt) + end + end +end + +-- Pseudo-opcodes for conditional assembly. +map_coreop[".if_1"] = function(params) + if not params then return "condition" end + local lvl = condlevel + 1 + local res = cond_eval(params[1]) + condlevel = lvl + condstack[lvl] = res + if not res then stmtskip() end +end + +map_coreop[".elif_1"] = function(params) + if not params then return "condition" end + if condlevel == 0 then wfatal(".elif without .if") end + local lvl = condlevel + local res = condstack[lvl] + if res then + if res == "else" then wfatal(".elif after .else") end + else + res = cond_eval(params[1]) + if res then + condstack[lvl] = res + return + end + end + stmtskip() +end + +map_coreop[".else_0"] = function(params) + if condlevel == 0 then wfatal(".else without .if") end + local lvl = condlevel + local res = condstack[lvl] + condstack[lvl] = "else" + if res then + if res == "else" then wfatal(".else after .else") end + stmtskip() + end +end + +map_coreop[".endif_0"] = function(params) + local lvl = condlevel + if lvl == 0 then wfatal(".endif without .if") end + condlevel = lvl - 1 +end + +-- Check for unfinished conditionals. +local function checkconds() + if g_errcount ~= "fatal" and condlevel ~= 0 then + wprinterr(g_fname, ":*: error: unbalanced conditional\n") + end +end + +------------------------------------------------------------------------------ + +-- Search for a file in the given path and open it for reading. +local function pathopen(path, name) + local dirsep = package and match(package.path, "\\") and "\\" or "/" + for _,p in ipairs(path) do + local fullname = p == "" and name or p..dirsep..name + local fin = io.open(fullname, "r") + if fin then + g_fname = fullname + return fin + end + end +end + +-- Include a file. +map_coreop[".include_1"] = function(params) + if not params then return "filename" end + local name = params[1] + -- Save state. Ugly, I know. but upvalues are fast. + local gf, gl, gcl, gi = g_fname, g_lineno, g_curline, g_indent + -- Read the included file. + local fatal = readfile(pathopen(g_opt.include, name) or + wfatal("include file `"..name.."' not found")) + -- Restore state. + g_synclineno = -1 + g_fname, g_lineno, g_curline, g_indent = gf, gl, gcl, gi + if fatal then wfatal("in include file") end +end + +-- Make .include and conditionals initially available, too. +map_op[".include_1"] = map_coreop[".include_1"] +map_op[".if_1"] = map_coreop[".if_1"] +map_op[".elif_1"] = map_coreop[".elif_1"] +map_op[".else_0"] = map_coreop[".else_0"] +map_op[".endif_0"] = map_coreop[".endif_0"] + +------------------------------------------------------------------------------ + +-- Support variables for macros. +local mac_capture, mac_lineno, mac_name +local mac_active = {} +local mac_list = {} + +-- Pseudo-opcode to define a macro. +map_coreop[".macro_*"] = function(mparams) + if not mparams then return "name [, params...]" end + -- Split off and validate macro name. + local name = remove(mparams, 1) + if not name then werror("missing macro name") end + if not (match(name, "^[%a_][%w_%.]*$") or match(name, "^%.[%w_%.]*$")) then + wfatal("bad macro name `"..name.."'") + end + -- Validate macro parameter names. + local mdup = {} + for _,mp in ipairs(mparams) do + if not match(mp, "^[%a_][%w_]*$") then + wfatal("bad macro parameter name `"..mp.."'") + end + if mdup[mp] then wfatal("duplicate macro parameter name `"..mp.."'") end + mdup[mp] = true + end + -- Check for duplicate or recursive macro definitions. + local opname = name.."_"..#mparams + if map_op[opname] or map_op[name.."_*"] then + wfatal("duplicate macro `"..name.."' ("..#mparams.." parameters)") + end + if mac_capture then wfatal("recursive macro definition") end + + -- Enable statement capture. + local lines = {} + mac_lineno = g_lineno + mac_name = name + mac_capture = function(stmt) -- Statement capture function. + -- Stop macro definition with .endmacro pseudo-opcode. + if not match(stmt, "^%s*.endmacro%s*$") then + lines[#lines+1] = stmt + return + end + mac_capture = nil + mac_lineno = nil + mac_name = nil + mac_list[#mac_list+1] = opname + -- Add macro-op definition. + map_op[opname] = function(params) + if not params then return mparams, lines end + -- Protect against recursive macro invocation. + if mac_active[opname] then wfatal("recursive macro invocation") end + mac_active[opname] = true + -- Setup substitution map. + local subst = {} + for i,mp in ipairs(mparams) do subst[mp] = params[i] end + local mcom + if g_opt.maccomment and g_opt.comment then + mcom = " MACRO "..name.." ("..#mparams..")" + wcomment("{"..mcom) + end + -- Loop through all captured statements + for _,stmt in ipairs(lines) do + -- Substitute macro parameters. + local st = gsub(stmt, "[%w_]+", subst) + st = definesubst(st) + st = gsub(st, "%s*%.%.%s*", "") -- Token paste a..b. + if mcom and sub(st, 1, 1) ~= "|" then wcomment(st) end + -- Emit statement. Use a protected call for better diagnostics. + local ok, err = pcall(dostmt, st) + if not ok then + -- Add the captured statement to the error. + wprinterr(err, "\n", g_indent, "| ", stmt, + "\t[MACRO ", name, " (", #mparams, ")]\n") + end + end + if mcom then wcomment("}"..mcom) end + mac_active[opname] = nil + end + end +end + +-- An .endmacro pseudo-opcode outside of a macro definition is an error. +map_coreop[".endmacro_0"] = function(params) + wfatal(".endmacro without .macro") +end + +-- Dump all macros and their contents (with -PP only). +local function dumpmacros(out, lvl) + sort(mac_list) + out:write("Macros:\n") + for _,opname in ipairs(mac_list) do + local name = sub(opname, 1, -3) + local params, lines = map_op[opname]() + out:write(format(" %-20s %s\n", name, concat(params, ", "))) + if lvl > 1 then + for _,line in ipairs(lines) do + out:write(" |", line, "\n") + end + out:write("\n") + end + end + out:write("\n") +end + +-- Check for unfinished macro definitions. +local function checkmacros() + if mac_capture then + wprinterr(g_fname, ":", mac_lineno, + ": error: unfinished .macro `", mac_name ,"'\n") + end +end + +------------------------------------------------------------------------------ + +-- Support variables for captures. +local cap_lineno, cap_name +local cap_buffers = {} +local cap_used = {} + +-- Start a capture. +map_coreop[".capture_1"] = function(params) + if not params then return "name" end + wflush() + local name = params[1] + if not match(name, "^[%a_][%w_]*$") then + wfatal("bad capture name `"..name.."'") + end + if cap_name then + wfatal("already capturing to `"..cap_name.."' since line "..cap_lineno) + end + cap_name = name + cap_lineno = g_lineno + -- Create or continue a capture buffer and start the output line capture. + local buf = cap_buffers[name] + if not buf then buf = {}; cap_buffers[name] = buf end + g_capbuffer = buf + g_synclineno = 0 +end + +-- Stop a capture. +map_coreop[".endcapture_0"] = function(params) + wflush() + if not cap_name then wfatal(".endcapture without a valid .capture") end + cap_name = nil + cap_lineno = nil + g_capbuffer = nil + g_synclineno = 0 +end + +-- Dump a capture buffer. +map_coreop[".dumpcapture_1"] = function(params) + if not params then return "name" end + wflush() + local name = params[1] + if not match(name, "^[%a_][%w_]*$") then + wfatal("bad capture name `"..name.."'") + end + cap_used[name] = true + wline(function(out) + local buf = cap_buffers[name] + if buf then wdumplines(out, buf) end + end) + g_synclineno = 0 +end + +-- Dump all captures and their buffers (with -PP only). +local function dumpcaptures(out, lvl) + out:write("Captures:\n") + for name,buf in pairs(cap_buffers) do + out:write(format(" %-20s %4s)\n", name, "("..#buf)) + if lvl > 1 then + local bar = rep("=", 76) + out:write(" ", bar, "\n") + for _,line in ipairs(buf) do + out:write(" ", line, "\n") + end + out:write(" ", bar, "\n\n") + end + end + out:write("\n") +end + +-- Check for unfinished or unused captures. +local function checkcaptures() + if cap_name then + wprinterr(g_fname, ":", cap_lineno, + ": error: unfinished .capture `", cap_name,"'\n") + return + end + for name in pairs(cap_buffers) do + if not cap_used[name] then + wprinterr(g_fname, ":*: error: missing .dumpcapture ", name ,"\n") + end + end +end + +------------------------------------------------------------------------------ + +-- Sections names. +local map_sections = {} + +-- Pseudo-opcode to define code sections. +-- TODO: Data sections, BSS sections. Needs extra C code and API. +map_coreop[".section_*"] = function(params) + if not params then return "name..." end + if #map_sections > 0 then werror("duplicate section definition") end + wflush() + for sn,name in ipairs(params) do + local opname = "."..name.."_0" + if not match(name, "^[%a][%w_]*$") or + map_op[opname] or map_op["."..name.."_*"] then + werror("bad section name `"..name.."'") + end + map_sections[#map_sections+1] = name + wline(format("#define DASM_SECTION_%s\t%d", upper(name), sn-1)) + map_op[opname] = function(params) g_arch.section(sn-1) end + end + wline(format("#define DASM_MAXSECTION\t\t%d", #map_sections)) +end + +-- Dump all sections. +local function dumpsections(out, lvl) + out:write("Sections:\n") + for _,name in ipairs(map_sections) do + out:write(format(" %s\n", name)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Replacement for customized Lua, which lacks the package library. +local prefix = "" +if not require then + function require(name) + local fp = assert(io.open(prefix..name..".lua")) + local s = fp:read("*a") + assert(fp:close()) + return assert(loadstring(s, "@"..name..".lua"))() + end +end + +-- Load architecture-specific module. +local function loadarch(arch) + if not match(arch, "^[%w_]+$") then return "bad arch name" end + local ok, m_arch = pcall(require, "dasm_"..arch) + if not ok then return "cannot load module: "..m_arch end + g_arch = m_arch + wflush = m_arch.passcb(wline, werror, wfatal, wwarn) + m_arch.setup(arch, g_opt) + map_op, map_def = m_arch.mergemaps(map_coreop, map_def) +end + +-- Dump architecture description. +function opt_map.dumparch(args) + local name = optparam(args) + if not g_arch then + local err = loadarch(name) + if err then opterror(err) end + end + + local t = {} + for name in pairs(map_coreop) do t[#t+1] = name end + for name in pairs(map_op) do t[#t+1] = name end + sort(t) + + local out = stdout + local _arch = g_arch._info + out:write(format("%s version %s, released %s, %s\n", + _info.name, _info.version, _info.release, _info.url)) + g_arch.dumparch(out) + + local pseudo = true + out:write("Pseudo-Opcodes:\n") + for _,sname in ipairs(t) do + local name, nparam = match(sname, "^(.+)_([0-9%*])$") + if name then + if pseudo and sub(name, 1, 1) ~= "." then + out:write("\nOpcodes:\n") + pseudo = false + end + local f = map_op[sname] + local s + if nparam ~= "*" then nparam = nparam + 0 end + if nparam == 0 then + s = "" + elseif type(f) == "string" then + s = map_op[".template__"](nil, f, nparam) + else + s = f(nil, nparam) + end + if type(s) == "table" then + for _,s2 in ipairs(s) do + out:write(format(" %-12s %s\n", name, s2)) + end + else + out:write(format(" %-12s %s\n", name, s)) + end + end + end + out:write("\n") + exit(0) +end + +-- Pseudo-opcode to set the architecture. +-- Only initially available (map_op is replaced when called). +map_op[".arch_1"] = function(params) + if not params then return "name" end + local err = loadarch(params[1]) + if err then wfatal(err) end + wline(format("#if DASM_VERSION != %d", _info.vernum)) + wline('#error "Version mismatch between DynASM and included encoding engine"') + wline("#endif") +end + +-- Dummy .arch pseudo-opcode to improve the error report. +map_coreop[".arch_1"] = function(params) + if not params then return "name" end + wfatal("duplicate .arch statement") +end + +------------------------------------------------------------------------------ + +-- Dummy pseudo-opcode. Don't confuse '.nop' with 'nop'. +map_coreop[".nop_*"] = function(params) + if not params then return "[ignored...]" end +end + +-- Pseudo-opcodes to raise errors. +map_coreop[".error_1"] = function(params) + if not params then return "message" end + werror(params[1]) +end + +map_coreop[".fatal_1"] = function(params) + if not params then return "message" end + wfatal(params[1]) +end + +-- Dump all user defined elements. +local function dumpdef(out) + local lvl = g_opt.dumpdef + if lvl == 0 then return end + dumpsections(out, lvl) + dumpdefines(out, lvl) + if g_arch then g_arch.dumpdef(out, lvl) end + dumpmacros(out, lvl) + dumpcaptures(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Helper for splitstmt. +local splitlvl + +local function splitstmt_one(c) + if c == "(" then + splitlvl = ")"..splitlvl + elseif c == "[" then + splitlvl = "]"..splitlvl + elseif c == "{" then + splitlvl = "}"..splitlvl + elseif c == ")" or c == "]" or c == "}" then + if sub(splitlvl, 1, 1) ~= c then werror("unbalanced (), [] or {}") end + splitlvl = sub(splitlvl, 2) + elseif splitlvl == "" then + return " \0 " + end + return c +end + +-- Split statement into (pseudo-)opcode and params. +local function splitstmt(stmt) + -- Convert label with trailing-colon into .label statement. + local label = match(stmt, "^%s*(.+):%s*$") + if label then return ".label", {label} end + + -- Split at commas and equal signs, but obey parentheses and brackets. + splitlvl = "" + stmt = gsub(stmt, "[,%(%)%[%]{}]", splitstmt_one) + if splitlvl ~= "" then werror("unbalanced () or []") end + + -- Split off opcode. + local op, other = match(stmt, "^%s*([^%s%z]+)%s*(.*)$") + if not op then werror("bad statement syntax") end + + -- Split parameters. + local params = {} + for p in gmatch(other, "%s*(%Z+)%z?") do + params[#params+1] = gsub(p, "%s+$", "") + end + if #params > 16 then werror("too many parameters") end + + params.op = op + return op, params +end + +-- Process a single statement. +dostmt = function(stmt) + -- Ignore empty statements. + if match(stmt, "^%s*$") then return end + + -- Capture macro defs before substitution. + if mac_capture then return mac_capture(stmt) end + stmt = definesubst(stmt) + + -- Emit C code without parsing the line. + if sub(stmt, 1, 1) == "|" then + local tail = sub(stmt, 2) + wflush() + if sub(tail, 1, 2) == "//" then wcomment(tail) else wline(tail, true) end + return + end + + -- Split into (pseudo-)opcode and params. + local op, params = splitstmt(stmt) + + -- Get opcode handler (matching # of parameters or generic handler). + local f = map_op[op.."_"..#params] or map_op[op.."_*"] + if not f then + if not g_arch then wfatal("first statement must be .arch") end + -- Improve error report. + for i=0,9 do + if map_op[op.."_"..i] then + werror("wrong number of parameters for `"..op.."'") + end + end + werror("unknown statement `"..op.."'") + end + + -- Call opcode handler or special handler for template strings. + if type(f) == "string" then + map_op[".template__"](params, f) + else + f(params) + end +end + +-- Process a single line. +local function doline(line) + if g_opt.flushline then wflush() end + + -- Assembler line? + local indent, aline = match(line, "^(%s*)%|(.*)$") + if not aline then + -- No, plain C code line, need to flush first. + wflush() + wsync() + wline(line, false) + return + end + + g_indent = indent -- Remember current line indentation. + + -- Emit C code (even from macros). Avoids echo and line parsing. + if sub(aline, 1, 1) == "|" then + if not mac_capture then + wsync() + elseif g_opt.comment then + wsync() + wcomment(aline) + end + dostmt(aline) + return + end + + -- Echo assembler line as a comment. + if g_opt.comment then + wsync() + wcomment(aline) + end + + -- Strip assembler comments. + aline = gsub(aline, "//.*$", "") + + -- Split line into statements at semicolons. + if match(aline, ";") then + for stmt in gmatch(aline, "[^;]+") do dostmt(stmt) end + else + dostmt(aline) + end +end + +------------------------------------------------------------------------------ + +-- Write DynASM header. +local function dasmhead(out) + out:write(format([[ +/* +** This file has been pre-processed with DynASM. +** %s +** DynASM version %s, DynASM %s version %s +** DO NOT EDIT! The original file is in "%s". +*/ + +]], _info.url, + _info.version, g_arch._info.arch, g_arch._info.version, + g_fname)) +end + +-- Read input file. +readfile = function(fin) + g_indent = "" + g_lineno = 0 + g_synclineno = -1 + + -- Process all lines. + for line in fin:lines() do + g_lineno = g_lineno + 1 + g_curline = line + local ok, err = pcall(doline, line) + if not ok and wprinterr(err, "\n") then return true end + end + wflush() + + -- Close input file. + assert(fin == stdin or fin:close()) +end + +-- Write output file. +local function writefile(outfile) + local fout + + -- Open output file. + if outfile == nil or outfile == "-" then + fout = stdout + else + fout = assert(io.open(outfile, "w")) + end + + -- Write all buffered lines + wdumplines(fout, g_wbuffer) + + -- Close output file. + assert(fout == stdout or fout:close()) + + -- Optionally dump definitions. + dumpdef(fout == stdout and stderr or stdout) +end + +-- Translate an input file to an output file. +local function translate(infile, outfile) + g_wbuffer = {} + g_indent = "" + g_lineno = 0 + g_synclineno = -1 + + -- Put header. + wline(dasmhead) + + -- Read input file. + local fin + if infile == "-" then + g_fname = "(stdin)" + fin = stdin + else + g_fname = infile + fin = assert(io.open(infile, "r")) + end + readfile(fin) + + -- Check for errors. + if not g_arch then + wprinterr(g_fname, ":*: error: missing .arch directive\n") + end + checkconds() + checkmacros() + checkcaptures() + + if g_errcount ~= 0 then + stderr:write(g_fname, ":*: info: ", g_errcount, " error", + (type(g_errcount) == "number" and g_errcount > 1) and "s" or "", + " in input file -- no output file generated.\n") + dumpdef(stderr) + exit(1) + end + + -- Write output file. + writefile(outfile) +end + +------------------------------------------------------------------------------ + +-- Print help text. +function opt_map.help() + stdout:write("DynASM -- ", _info.description, ".\n") + stdout:write("DynASM ", _info.version, " ", _info.release, " ", _info.url, "\n") + stdout:write[[ + +Usage: dynasm [OPTION]... INFILE.dasc|- + + -h, --help Display this help text. + -V, --version Display version and copyright information. + + -o, --outfile FILE Output file name (default is stdout). + -I, --include DIR Add directory to the include search path. + + -c, --ccomment Use /* */ comments for assembler lines. + -C, --cppcomment Use // comments for assembler lines (default). + -N, --nocomment Suppress assembler lines in output. + -M, --maccomment Show macro expansions as comments (default off). + + -L, --nolineno Suppress CPP line number information in output. + -F, --flushline Flush action list for every line. + + -D NAME[=SUBST] Define a substitution. + -U NAME Undefine a substitution. + + -P, --dumpdef Dump defines, macros, etc. Repeat for more output. + -A, --dumparch ARCH Load architecture ARCH and dump description. +]] + exit(0) +end + +-- Print version information. +function opt_map.version() + stdout:write(format("%s version %s, released %s\n%s\n\n%s", + _info.name, _info.version, _info.release, _info.url, _info.copyright)) + exit(0) +end + +-- Misc. options. +function opt_map.outfile(args) g_opt.outfile = optparam(args) end +function opt_map.include(args) insert(g_opt.include, 1, optparam(args)) end +function opt_map.ccomment() g_opt.comment = "/*|"; g_opt.endcomment = " */" end +function opt_map.cppcomment() g_opt.comment = "//|"; g_opt.endcomment = "" end +function opt_map.nocomment() g_opt.comment = false end +function opt_map.maccomment() g_opt.maccomment = true end +function opt_map.nolineno() g_opt.cpp = false end +function opt_map.flushline() g_opt.flushline = true end +function opt_map.dumpdef() g_opt.dumpdef = g_opt.dumpdef + 1 end + +------------------------------------------------------------------------------ + +-- Short aliases for long options. +local opt_alias = { + h = "help", ["?"] = "help", V = "version", + o = "outfile", I = "include", + c = "ccomment", C = "cppcomment", N = "nocomment", M = "maccomment", + L = "nolineno", F = "flushline", + P = "dumpdef", A = "dumparch", +} + +-- Parse single option. +local function parseopt(opt, args) + opt_current = #opt == 1 and "-"..opt or "--"..opt + local f = opt_map[opt] or opt_map[opt_alias[opt]] + if not f then + opterror("unrecognized option `", opt_current, "'. Try `--help'.\n") + end + f(args) +end + +-- Parse arguments. +local function parseargs(args) + -- Default options. + g_opt.comment = "//|" + g_opt.endcomment = "" + g_opt.cpp = true + g_opt.dumpdef = 0 + g_opt.include = { "" } + + -- Process all option arguments. + args.argn = 1 + repeat + local a = args[args.argn] + if not a then break end + local lopt, opt = match(a, "^%-(%-?)(.+)") + if not opt then break end + args.argn = args.argn + 1 + if lopt == "" then + -- Loop through short options. + for o in gmatch(opt, ".") do parseopt(o, args) end + else + -- Long option. + parseopt(opt, args) + end + until false + + -- Check for proper number of arguments. + local nargs = #args - args.argn + 1 + if nargs ~= 1 then + if nargs == 0 then + if g_opt.dumpdef > 0 then return dumpdef(stdout) end + end + opt_map.help() + end + + -- Translate a single input file to a single output file + -- TODO: Handle multiple files? + translate(args[args.argn], g_opt.outfile) +end + +------------------------------------------------------------------------------ + +-- Add the directory dynasm.lua resides in to the Lua module search path. +local arg = arg +if arg and arg[0] then + prefix = match(arg[0], "^(.*[/\\])") + if package and prefix then package.path = prefix.."?.lua;"..package.path end +end + +-- Start DynASM. +parseargs{...} + +------------------------------------------------------------------------------ + diff --git a/lib/LuaJIT/etc/luajit.1 b/lib/LuaJIT/etc/luajit.1 new file mode 100644 index 0000000..0d263db --- /dev/null +++ b/lib/LuaJIT/etc/luajit.1 @@ -0,0 +1,88 @@ +.TH luajit 1 "" "" "LuaJIT documentation" +.SH NAME +luajit \- Just-In-Time Compiler for the Lua Language +\fB +.SH SYNOPSIS +.B luajit +[\fIoptions\fR]... [\fIscript\fR [\fIargs\fR]...] +.SH "WEB SITE" +.IR http://luajit.org +.SH DESCRIPTION +.PP +This is the command-line program to run Lua programs with \fBLuaJIT\fR. +.PP +\fBLuaJIT\fR is a just-in-time (JIT) compiler for the Lua language. +The virtual machine (VM) is based on a fast interpreter combined with +a trace compiler. It can significantly improve the performance of Lua programs. +.PP +\fBLuaJIT\fR is API\- and ABI-compatible with the VM of the standard +Lua\ 5.1 interpreter. When embedding the VM into an application, +the built library can be used as a drop-in replacement. +.SH OPTIONS +.TP +.BI "\-e " chunk +Run the given chunk of Lua code. +.TP +.BI "\-l " library +Load the named library, just like \fBrequire("\fR\fIlibrary\fR\fB")\fR. +.TP +.BI "\-b " ... +Save or list bytecode. Run without arguments to get help on options. +.TP +.BI "\-j " command +Perform LuaJIT control command (optional space after \fB\-j\fR). +.TP +.BI "\-O" [opt] +Control LuaJIT optimizations. +.TP +.B "\-i" +Run in interactive mode. +.TP +.B "\-v" +Show \fBLuaJIT\fR version. +.TP +.B "\-E" +Ignore environment variables. +.TP +.B "\-\-" +Stop processing options. +.TP +.B "\-" +Read script from stdin instead. +.PP +After all options are processed, the given \fIscript\fR is run. +The arguments are passed in the global \fIarg\fR table. +.PP +Interactive mode is only entered, if no \fIscript\fR and no \fB\-e\fR +option is given. Interactive mode can be left with EOF (\fICtrl\-Z\fB). +.SH EXAMPLES +.TP +luajit hello.lua world + +Prints "Hello world", assuming \fIhello.lua\fR contains: +.br + print("Hello", arg[1]) +.TP +luajit \-e "local x=0; for i=1,1e9 do x=x+i end; print(x)" + +Calculates the sum of the numbers from 1 to 1000000000. +.br +And finishes in a reasonable amount of time, too. +.TP +luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end" + +Runs some nested loops and shows the resulting traces. +.SH COPYRIGHT +.PP +\fBLuaJIT\fR is Copyright \(co 2005-2017 Mike Pall. +.br +\fBLuaJIT\fR is open source software, released under the MIT license. +.SH SEE ALSO +.PP +More details in the provided HTML docs or at: +.IR http://luajit.org +.br +More about the Lua language can be found at: +.IR http://lua.org/docs.html +.PP +lua(1) diff --git a/lib/LuaJIT/etc/luajit.pc b/lib/LuaJIT/etc/luajit.pc new file mode 100644 index 0000000..a78f174 --- /dev/null +++ b/lib/LuaJIT/etc/luajit.pc @@ -0,0 +1,25 @@ +# Package information for LuaJIT to be used by pkg-config. +majver=2 +minver=1 +relver=0 +version=${majver}.${minver}.${relver}-beta3 +abiver=5.1 + +prefix=/usr/local +multilib=lib +exec_prefix=${prefix} +libdir=${exec_prefix}/${multilib} +libname=luajit-${abiver} +includedir=${prefix}/include/luajit-${majver}.${minver} + +INSTALL_LMOD=${prefix}/share/lua/${abiver} +INSTALL_CMOD=${prefix}/${multilib}/lua/${abiver} + +Name: LuaJIT +Description: Just-in-time compiler for Lua +URL: http://luajit.org +Version: ${version} +Requires: +Libs: -L${libdir} -l${libname} +Libs.private: -Wl,-E -lm -ldl +Cflags: -I${includedir} diff --git a/lib/LuaJIT/libluajit.a b/lib/LuaJIT/libluajit.a Binary files differdeleted file mode 100644 index c82e81c..0000000 --- a/lib/LuaJIT/libluajit.a +++ /dev/null diff --git a/lib/LuaJIT/libluajit.so b/lib/LuaJIT/libluajit.so Binary files differdeleted file mode 100755 index 04bd982..0000000 --- a/lib/LuaJIT/libluajit.so +++ /dev/null diff --git a/lib/LuaJIT/src/.gitignore b/lib/LuaJIT/src/.gitignore new file mode 100644 index 0000000..1a30573 --- /dev/null +++ b/lib/LuaJIT/src/.gitignore @@ -0,0 +1,7 @@ +luajit +lj_bcdef.h +lj_ffdef.h +lj_libdef.h +lj_recdef.h +lj_folddef.h +lj_vm.[sS] diff --git a/lib/LuaJIT/src/Makefile b/lib/LuaJIT/src/Makefile new file mode 100644 index 0000000..d22eb73 --- /dev/null +++ b/lib/LuaJIT/src/Makefile @@ -0,0 +1,721 @@ +############################################################################## +# LuaJIT Makefile. Requires GNU Make. +# +# Please read doc/install.html before changing any variables! +# +# Suitable for POSIX platforms (Linux, *BSD, OSX etc.). +# Also works with MinGW and Cygwin on Windows. +# Please check msvcbuild.bat for building with MSVC on Windows. +# +# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +############################################################################## + +MAJVER= 2 +MINVER= 1 +RELVER= 0 +ABIVER= 5.1 +NODOTABIVER= 51 + +############################################################################## +############################# COMPILER OPTIONS ############################# +############################################################################## +# These options mainly affect the speed of the JIT compiler itself, not the +# speed of the JIT-compiled code. Turn any of the optional settings on by +# removing the '#' in front of them. Make sure you force a full recompile +# with "make clean", followed by "make" if you change any options. +# +DEFAULT_CC = gcc +# +# LuaJIT builds as a native 32 or 64 bit binary by default. +CC= $(DEFAULT_CC) +# +# Use this if you want to force a 32 bit build on a 64 bit multilib OS. +#CC= $(DEFAULT_CC) -m32 +# +# Since the assembler part does NOT maintain a frame pointer, it's pointless +# to slow down the C part by not omitting it. Debugging, tracebacks and +# unwinding are not affected -- the assembler part has frame unwind +# information and GCC emits it where needed (x64) or with -g (see CCDEBUG). +CCOPT= -O2 -fomit-frame-pointer +# Use this if you want to generate a smaller binary (but it's slower): +#CCOPT= -Os -fomit-frame-pointer +# Note: it's no longer recommended to use -O3 with GCC 4.x. +# The I-Cache bloat usually outweighs the benefits from aggressive inlining. +# +# Target-specific compiler options: +# +# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute +# the binaries to a different machine you could also use: -march=native +# +CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse +CCOPT_x64= +CCOPT_arm= +CCOPT_arm64= +CCOPT_ppc= +CCOPT_mips= +# +CCDEBUG= +# Uncomment the next line to generate debug information: +#CCDEBUG= -g +# +CCWARN= -Wall +# Uncomment the next line to enable more warnings: +#CCWARN+= -Wextra -Wdeclaration-after-statement -Wredundant-decls -Wshadow -Wpointer-arith +# +############################################################################## + +############################################################################## +################################ BUILD MODE ################################ +############################################################################## +# The default build mode is mixed mode on POSIX. On Windows this is the same +# as dynamic mode. +# +# Mixed mode creates a static + dynamic library and a statically linked luajit. +BUILDMODE= mixed +# +# Static mode creates a static library and a statically linked luajit. +#BUILDMODE= static +# +# Dynamic mode creates a dynamic library and a dynamically linked luajit. +# Note: this executable will only run when the library is installed! +#BUILDMODE= dynamic +# +############################################################################## + +############################################################################## +################################# FEATURES ################################# +############################################################################## +# Enable/disable these features as needed, but make sure you force a full +# recompile with "make clean", followed by "make". +XCFLAGS= +# +# Permanently disable the FFI extension to reduce the size of the LuaJIT +# executable. But please consider that the FFI library is compiled-in, +# but NOT loaded by default. It only allocates any memory, if you actually +# make use of it. +#XCFLAGS+= -DLUAJIT_DISABLE_FFI +# +# Features from Lua 5.2 that are unlikely to break existing code are +# enabled by default. Some other features that *might* break some existing +# code (e.g. __pairs or os.execute() return values) can be enabled here. +# Note: this does not provide full compatibility with Lua 5.2 at this time. +#XCFLAGS+= -DLUAJIT_ENABLE_LUA52COMPAT +# +# Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter. +#XCFLAGS+= -DLUAJIT_DISABLE_JIT +# +# Some architectures (e.g. PPC) can use either single-number (1) or +# dual-number (2) mode. Uncomment one of these lines to override the +# default mode. Please see LJ_ARCH_NUMMODE in lj_arch.h for details. +#XCFLAGS+= -DLUAJIT_NUMMODE=1 +#XCFLAGS+= -DLUAJIT_NUMMODE=2 +# +# Enable GC64 mode for x64. +#XCFLAGS+= -DLUAJIT_ENABLE_GC64 +# +############################################################################## + +############################################################################## +############################ DEBUGGING SUPPORT ############################# +############################################################################## +# Enable these options as needed, but make sure you force a full recompile +# with "make clean", followed by "make". +# Note that most of these are NOT suitable for benchmarking or release mode! +# +# Use the system provided memory allocator (realloc) instead of the +# bundled memory allocator. This is slower, but sometimes helpful for +# debugging. This option cannot be enabled on x64 without GC64, since +# realloc usually doesn't return addresses in the right address range. +# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and +# the only way to get useful results from it for all other architectures. +#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC +# +# This define is required to run LuaJIT under Valgrind. The Valgrind +# header files must be installed. You should enable debug information, too. +# Use --suppressions=lj.supp to avoid some false positives. +#XCFLAGS+= -DLUAJIT_USE_VALGRIND +# +# This is the client for the GDB JIT API. GDB 7.0 or higher is required +# to make use of it. See lj_gdbjit.c for details. Enabling this causes +# a non-negligible overhead, even when not running under GDB. +#XCFLAGS+= -DLUAJIT_USE_GDBJIT +# +# Turn on assertions for the Lua/C API to debug problems with lua_* calls. +# This is rather slow -- use only while developing C libraries/embeddings. +#XCFLAGS+= -DLUA_USE_APICHECK +# +# Turn on assertions for the whole LuaJIT VM. This significantly slows down +# everything. Use only if you suspect a problem with LuaJIT itself. +#XCFLAGS+= -DLUA_USE_ASSERT +# +############################################################################## +# You probably don't need to change anything below this line! +############################################################################## + +############################################################################## +# Host system detection. +############################################################################## + +ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM)) + HOST_SYS= Windows + HOST_RM= del +else + HOST_SYS:= $(shell uname -s) + ifneq (,$(findstring MINGW,$(HOST_SYS))) + HOST_SYS= Windows + HOST_MSYS= mingw + endif + ifneq (,$(findstring MSYS,$(HOST_SYS))) + HOST_SYS= Windows + HOST_MSYS= mingw + endif + ifneq (,$(findstring CYGWIN,$(HOST_SYS))) + HOST_SYS= Windows + HOST_MSYS= cygwin + endif +endif + +############################################################################## +# Flags and options for host and target. +############################################################################## + +# You can override the following variables at the make command line: +# CC HOST_CC STATIC_CC DYNAMIC_CC +# CFLAGS HOST_CFLAGS TARGET_CFLAGS +# LDFLAGS HOST_LDFLAGS TARGET_LDFLAGS TARGET_SHLDFLAGS +# LIBS HOST_LIBS TARGET_LIBS +# CROSS HOST_SYS TARGET_SYS TARGET_FLAGS +# +# Cross-compilation examples: +# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows +# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- + +ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) +CCOPTIONS= $(CCDEBUG) $(ASOPTIONS) +LDOPTIONS= $(CCDEBUG) $(LDFLAGS) + +HOST_CC= $(CC) +HOST_RM?= rm -f +# If left blank, minilua is built and used. You can supply an installed +# copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua +HOST_LUA= + +HOST_XCFLAGS= -I. +HOST_XLDFLAGS= +HOST_XLIBS= +HOST_ACFLAGS= $(CCOPTIONS) $(HOST_XCFLAGS) $(TARGET_ARCH) $(HOST_CFLAGS) +HOST_ALDFLAGS= $(LDOPTIONS) $(HOST_XLDFLAGS) $(HOST_LDFLAGS) +HOST_ALIBS= $(HOST_XLIBS) $(LIBS) $(HOST_LIBS) + +STATIC_CC = $(CROSS)$(CC) +DYNAMIC_CC = $(CROSS)$(CC) -fPIC +TARGET_CC= $(STATIC_CC) +TARGET_STCC= $(STATIC_CC) +TARGET_DYNCC= $(DYNAMIC_CC) +TARGET_LD= $(CROSS)$(CC) +TARGET_AR= $(CROSS)ar rcus +TARGET_STRIP= $(CROSS)strip + +TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) +TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER) +TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib +TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME) +TARGET_DLLNAME= lua$(NODOTABIVER).dll +TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME) +TARGET_DYNXLDOPTS= + +TARGET_LFSFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE +TARGET_XCFLAGS= $(TARGET_LFSFLAGS) -U_FORTIFY_SOURCE +TARGET_XLDFLAGS= +TARGET_XLIBS= -lm +TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) +TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) +TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) +TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS) +TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) +TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) + +TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) +ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= x64 +else +ifneq (,$(findstring LJ_TARGET_X86 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= x86 +else +ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) + TARGET_LJARCH= arm +else +ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) + ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) + TARGET_ARCH= -D__AARCH64EB__=1 + endif + TARGET_LJARCH= arm64 +else +ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) + ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) + TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE + else + TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE + endif + TARGET_LJARCH= ppc +else +ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) + ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) + TARGET_ARCH= -D__MIPSEL__=1 + endif + ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= mips64 + else + TARGET_LJARCH= mips + endif +else + $(error Unsupported target architecture) +endif +endif +endif +endif +endif +endif + +ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) + TARGET_SYS= PS3 + TARGET_ARCH+= -D__CELLOS_LV2__ + TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC + TARGET_XLIBS+= -lpthread +endif + +TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH)) +TARGET_ARCH+= $(patsubst %,-DLUAJIT_TARGET=LUAJIT_ARCH_%,$(TARGET_LJARCH)) + +ifneq (,$(PREFIX)) +ifneq (/usr/local,$(PREFIX)) + TARGET_XCFLAGS+= -DLUA_ROOT=\"$(PREFIX)\" + ifneq (/usr,$(PREFIX)) + TARGET_DYNXLDOPTS= -Wl,-rpath,$(TARGET_LIBPATH) + endif +endif +endif +ifneq (,$(MULTILIB)) + TARGET_XCFLAGS+= -DLUA_MULTILIB=\"$(MULTILIB)\" +endif +ifneq (,$(LMULTILIB)) + TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\" +endif + +############################################################################## +# Target system detection. +############################################################################## + +TARGET_SYS?= $(HOST_SYS) +ifeq (Windows,$(TARGET_SYS)) + TARGET_STRIP+= --strip-unneeded + TARGET_XSHLDFLAGS= -shared + TARGET_DYNXLDOPTS= +else + TARGET_AR+= 2>/dev/null +ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1)) + TARGET_XCFLAGS+= -fno-stack-protector +endif +ifeq (Darwin,$(TARGET_SYS)) + ifeq (,$(MACOSX_DEPLOYMENT_TARGET)) + export MACOSX_DEPLOYMENT_TARGET=10.4 + endif + TARGET_STRIP+= -x + TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC + TARGET_DYNXLDOPTS= + TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) + ifeq (x64,$(TARGET_LJARCH)) + TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000 + TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000 + endif +else +ifeq (iOS,$(TARGET_SYS)) + TARGET_STRIP+= -x + TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC + TARGET_DYNXLDOPTS= + TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) + ifeq (arm64,$(TARGET_LJARCH)) + TARGET_XCFLAGS+= -fno-omit-frame-pointer + endif +else + ifneq (SunOS,$(TARGET_SYS)) + ifneq (PS3,$(TARGET_SYS)) + TARGET_XLDFLAGS+= -Wl,-E + endif + endif + ifeq (Linux,$(TARGET_SYS)) + TARGET_XLIBS+= -ldl + endif + ifeq (GNU/kFreeBSD,$(TARGET_SYS)) + TARGET_XLIBS+= -ldl + endif +endif +endif +endif + +ifneq ($(HOST_SYS),$(TARGET_SYS)) + ifeq (Windows,$(TARGET_SYS)) + HOST_XCFLAGS+= -malign-double -DLUAJIT_OS=LUAJIT_OS_WINDOWS + else + ifeq (Linux,$(TARGET_SYS)) + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_LINUX + else + ifeq (Darwin,$(TARGET_SYS)) + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX + else + ifeq (iOS,$(TARGET_SYS)) + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX + else + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER + endif + endif + endif + endif +endif + +ifneq (,$(CCDEBUG)) + TARGET_STRIP= @: +endif + +############################################################################## +# Files and pathnames. +############################################################################## + +MINILUA_O= host/minilua.o +MINILUA_LIBS= -lm +MINILUA_T= host/minilua +MINILUA_X= $(MINILUA_T) + +ifeq (,$(HOST_LUA)) + HOST_LUA= $(MINILUA_X) + DASM_DEP= $(MINILUA_T) +endif + +DASM_DIR= ../dynasm +DASM= $(HOST_LUA) $(DASM_DIR)/dynasm.lua +DASM_XFLAGS= +DASM_AFLAGS= +DASM_ARCH= $(TARGET_LJARCH) + +ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D ENDIAN_LE +else + DASM_AFLAGS+= -D ENDIAN_BE +endif +ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D P64 +endif +ifneq (,$(findstring LJ_HASJIT 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D JIT +endif +ifneq (,$(findstring LJ_HASFFI 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D FFI +endif +ifneq (,$(findstring LJ_DUALNUM 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D DUALNUM +endif +ifneq (,$(findstring LJ_ARCH_HASFPU 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D FPU + TARGET_ARCH+= -DLJ_ARCH_HASFPU=1 +else + TARGET_ARCH+= -DLJ_ARCH_HASFPU=0 +endif +ifeq (,$(findstring LJ_ABI_SOFTFP 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D HFABI + TARGET_ARCH+= -DLJ_ABI_SOFTFP=0 +else + TARGET_ARCH+= -DLJ_ABI_SOFTFP=1 +endif +ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D NO_UNWIND + TARGET_ARCH+= -DLUAJIT_NO_UNWIND +endif +DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) +ifeq (Windows,$(TARGET_SYS)) + DASM_AFLAGS+= -D WIN +endif +ifeq (x64,$(TARGET_LJARCH)) + ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH))) + DASM_ARCH= x86 + endif +else +ifeq (arm,$(TARGET_LJARCH)) + ifeq (iOS,$(TARGET_SYS)) + DASM_AFLAGS+= -D IOS + endif +else +ifeq (ppc,$(TARGET_LJARCH)) + ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D SQRT + endif + ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D ROUND + endif + ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D GPR64 + endif + ifeq (PS3,$(TARGET_SYS)) + DASM_AFLAGS+= -D PPE -D TOC + endif + ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH))) + DASM_ARCH= ppc64 + endif +endif +endif +endif + +DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) +DASM_DASC= vm_$(DASM_ARCH).dasc + +BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ + host/buildvm_lib.o host/buildvm_fold.o +BUILDVM_T= host/buildvm +BUILDVM_X= $(BUILDVM_T) + +HOST_O= $(MINILUA_O) $(BUILDVM_O) +HOST_T= $(MINILUA_T) $(BUILDVM_T) + +LJVM_S= lj_vm.S +LJVM_O= lj_vm.o +LJVM_BOUT= $(LJVM_S) +LJVM_MODE= elfasm + +LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ + lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o +LJLIB_C= $(LJLIB_O:.o=.c) + +LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ + lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ + lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ + lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ + lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ + lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ + lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ + lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ + lj_asm.o lj_trace.o lj_gdbjit.o \ + lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ + lj_carith.o lj_clib.o lj_cparse.o \ + lj_lib.o lj_alloc.o lib_aux.o \ + $(LJLIB_O) lib_init.o + +LJVMCORE_O= $(LJVM_O) $(LJCORE_O) +LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) + +LIB_VMDEF= jit/vmdef.lua +LIB_VMDEFP= $(LIB_VMDEF) + +LUAJIT_O= luajit.o +LUAJIT_A= libluajit.a +LUAJIT_SO= libluajit.so +LUAJIT_T= luajit + +ALL_T= $(LUAJIT_T) $(LUAJIT_A) $(LUAJIT_SO) $(HOST_T) +ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \ + host/buildvm_arch.h +ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP) +WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk +ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM) + +############################################################################## +# Build mode handling. +############################################################################## + +# Mixed mode defaults. +TARGET_O= $(LUAJIT_A) +TARGET_T= $(LUAJIT_T) $(LUAJIT_SO) +TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO) + +ifeq (Windows,$(TARGET_SYS)) + TARGET_DYNCC= $(STATIC_CC) + LJVM_MODE= peobj + LJVM_BOUT= $(LJVM_O) + LUAJIT_T= luajit.exe + ifeq (cygwin,$(HOST_MSYS)) + LUAJIT_SO= cyg$(TARGET_DLLNAME) + else + LUAJIT_SO= $(TARGET_DLLNAME) + endif + # Mixed mode is not supported on Windows. And static mode doesn't work well. + # C modules cannot be loaded, because they bind to lua51.dll. + ifneq (static,$(BUILDMODE)) + BUILDMODE= dynamic + TARGET_XCFLAGS+= -DLUA_BUILD_AS_DLL + endif +endif +ifeq (Darwin,$(TARGET_SYS)) + LJVM_MODE= machasm +endif +ifeq (iOS,$(TARGET_SYS)) + LJVM_MODE= machasm +endif +ifeq (SunOS,$(TARGET_SYS)) + BUILDMODE= static +endif +ifeq (PS3,$(TARGET_SYS)) + BUILDMODE= static +endif + +ifeq (Windows,$(HOST_SYS)) + MINILUA_T= host/minilua.exe + BUILDVM_T= host/buildvm.exe + ifeq (,$(HOST_MSYS)) + MINILUA_X= host\minilua + BUILDVM_X= host\buildvm + ALL_RM:= $(subst /,\,$(ALL_RM)) + endif +endif + +ifeq (static,$(BUILDMODE)) + TARGET_DYNCC= @: + TARGET_T= $(LUAJIT_T) + TARGET_DEP= $(LIB_VMDEF) +else +ifeq (dynamic,$(BUILDMODE)) + ifneq (Windows,$(TARGET_SYS)) + TARGET_CC= $(DYNAMIC_CC) + endif + TARGET_DYNCC= @: + LJVMCORE_DYNO= $(LJVMCORE_O) + TARGET_O= $(LUAJIT_SO) + TARGET_XLDFLAGS+= $(TARGET_DYNXLDOPTS) +else +ifeq (Darwin,$(TARGET_SYS)) + TARGET_DYNCC= @: + LJVMCORE_DYNO= $(LJVMCORE_O) +endif +ifeq (iOS,$(TARGET_SYS)) + TARGET_DYNCC= @: + LJVMCORE_DYNO= $(LJVMCORE_O) +endif +endif +endif + +Q= @ +E= @echo +#Q= +#E= @: + +############################################################################## +# Make targets. +############################################################################## + +default all: $(TARGET_T) + +amalg: + @grep "^[+|]" ljamalg.c + $(MAKE) all "LJCORE_O=ljamalg.o" + +clean: + $(HOST_RM) $(ALL_RM) + +libbc: + ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C) + $(MAKE) all + +depend: + @for file in $(ALL_HDRGEN); do \ + test -f $$file || touch $$file; \ + done + @$(HOST_CC) $(HOST_ACFLAGS) -MM *.c host/*.c | \ + sed -e "s| [^ ]*/dasm_\S*\.h||g" \ + -e "s|^\([^l ]\)|host/\1|" \ + -e "s| lj_target_\S*\.h| lj_target_*.h|g" \ + -e "s| lj_emit_\S*\.h| lj_emit_*.h|g" \ + -e "s| lj_asm_\S*\.h| lj_asm_*.h|g" >Makefile.dep + @for file in $(ALL_HDRGEN); do \ + test -s $$file || $(HOST_RM) $$file; \ + done + +.PHONY: default all amalg clean libbc depend + +############################################################################## +# Rules for generated files. +############################################################################## + +$(MINILUA_T): $(MINILUA_O) + $(E) "HOSTLINK $@" + $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) + +host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua + $(E) "DYNASM $@" + $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) + +host/buildvm.o: $(DASM_DIR)/dasm_*.h + +$(BUILDVM_T): $(BUILDVM_O) + $(E) "HOSTLINK $@" + $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(BUILDVM_O) $(HOST_ALIBS) + +$(LJVM_BOUT): $(BUILDVM_T) + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m $(LJVM_MODE) -o $@ + +lj_bcdef.h: $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m bcdef -o $@ $(LJLIB_C) + +lj_ffdef.h: $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m ffdef -o $@ $(LJLIB_C) + +lj_libdef.h: $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m libdef -o $@ $(LJLIB_C) + +lj_recdef.h: $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m recdef -o $@ $(LJLIB_C) + +$(LIB_VMDEF): $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m vmdef -o $(LIB_VMDEFP) $(LJLIB_C) + +lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c + $(E) "BUILDVM $@" + $(Q)$(BUILDVM_X) -m folddef -o $@ lj_opt_fold.c + +############################################################################## +# Object file rules. +############################################################################## + +%.o: %.c + $(E) "CC $@" + $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< + $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< + +%.o: %.S + $(E) "ASM $@" + $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $< + $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $< + +$(LUAJIT_O): + $(E) "CC $@" + $(Q)$(TARGET_STCC) $(TARGET_ACFLAGS) -c -o $@ $< + +$(HOST_O): %.o: %.c + $(E) "HOSTCC $@" + $(Q)$(HOST_CC) $(HOST_ACFLAGS) -c -o $@ $< + +include Makefile.dep + +############################################################################## +# Target file rules. +############################################################################## + +$(LUAJIT_A): $(LJVMCORE_O) + $(E) "AR $@" + $(Q)$(TARGET_AR) $@ $(LJVMCORE_O) + +# The dependency on _O, but linking with _DYNO is intentional. +$(LUAJIT_SO): $(LJVMCORE_O) + $(E) "DYNLINK $@" + $(Q)$(TARGET_LD) $(TARGET_ASHLDFLAGS) -o $@ $(LJVMCORE_DYNO) $(TARGET_ALIBS) + $(Q)$(TARGET_STRIP) $@ + +$(LUAJIT_T): $(TARGET_O) $(LUAJIT_O) $(TARGET_DEP) + $(E) "LINK $@" + $(Q)$(TARGET_LD) $(TARGET_ALDFLAGS) -o $@ $(LUAJIT_O) $(TARGET_O) $(TARGET_ALIBS) + $(Q)$(TARGET_STRIP) $@ + $(E) "OK Successfully built LuaJIT" + +############################################################################## diff --git a/lib/LuaJIT/Makefile.dep b/lib/LuaJIT/src/Makefile.dep index 2b1cb5e..2b1cb5e 100644 --- a/lib/LuaJIT/Makefile.dep +++ b/lib/LuaJIT/src/Makefile.dep diff --git a/lib/LuaJIT/host/.gitignore b/lib/LuaJIT/src/host/.gitignore index 762ac2a..762ac2a 100644 --- a/lib/LuaJIT/host/.gitignore +++ b/lib/LuaJIT/src/host/.gitignore diff --git a/lib/LuaJIT/host/README b/lib/LuaJIT/src/host/README index abfcdaa..abfcdaa 100644 --- a/lib/LuaJIT/host/README +++ b/lib/LuaJIT/src/host/README diff --git a/lib/LuaJIT/host/buildvm.c b/lib/LuaJIT/src/host/buildvm.c index de23fab..de23fab 100644 --- a/lib/LuaJIT/host/buildvm.c +++ b/lib/LuaJIT/src/host/buildvm.c diff --git a/lib/LuaJIT/host/buildvm.h b/lib/LuaJIT/src/host/buildvm.h index b90428d..b90428d 100644 --- a/lib/LuaJIT/host/buildvm.h +++ b/lib/LuaJIT/src/host/buildvm.h diff --git a/lib/LuaJIT/host/buildvm_asm.c b/lib/LuaJIT/src/host/buildvm_asm.c index 43595b3..43595b3 100644 --- a/lib/LuaJIT/host/buildvm_asm.c +++ b/lib/LuaJIT/src/host/buildvm_asm.c diff --git a/lib/LuaJIT/host/buildvm_fold.c b/lib/LuaJIT/src/host/buildvm_fold.c index d579f4d..d579f4d 100644 --- a/lib/LuaJIT/host/buildvm_fold.c +++ b/lib/LuaJIT/src/host/buildvm_fold.c diff --git a/lib/LuaJIT/host/buildvm_lib.c b/lib/LuaJIT/src/host/buildvm_lib.c index 2956fdb..2956fdb 100644 --- a/lib/LuaJIT/host/buildvm_lib.c +++ b/lib/LuaJIT/src/host/buildvm_lib.c diff --git a/lib/LuaJIT/host/buildvm_libbc.h b/lib/LuaJIT/src/host/buildvm_libbc.h index b2600bd..b2600bd 100644 --- a/lib/LuaJIT/host/buildvm_libbc.h +++ b/lib/LuaJIT/src/host/buildvm_libbc.h diff --git a/lib/LuaJIT/host/buildvm_peobj.c b/lib/LuaJIT/src/host/buildvm_peobj.c index 2eb2bb7..2eb2bb7 100644 --- a/lib/LuaJIT/host/buildvm_peobj.c +++ b/lib/LuaJIT/src/host/buildvm_peobj.c diff --git a/lib/LuaJIT/host/genlibbc.lua b/lib/LuaJIT/src/host/genlibbc.lua index 6f5a05c..6f5a05c 100644 --- a/lib/LuaJIT/host/genlibbc.lua +++ b/lib/LuaJIT/src/host/genlibbc.lua diff --git a/lib/LuaJIT/host/genminilua.lua b/lib/LuaJIT/src/host/genminilua.lua index 50feff0..50feff0 100644 --- a/lib/LuaJIT/host/genminilua.lua +++ b/lib/LuaJIT/src/host/genminilua.lua diff --git a/lib/LuaJIT/host/minilua.c b/lib/LuaJIT/src/host/minilua.c index 7915028..7915028 100644 --- a/lib/LuaJIT/host/minilua.c +++ b/lib/LuaJIT/src/host/minilua.c diff --git a/lib/LuaJIT/jit/.gitignore b/lib/LuaJIT/src/jit/.gitignore index 500e285..500e285 100644 --- a/lib/LuaJIT/jit/.gitignore +++ b/lib/LuaJIT/src/jit/.gitignore diff --git a/lib/LuaJIT/jit/bc.lua b/lib/LuaJIT/src/jit/bc.lua index 193cf01..193cf01 100644 --- a/lib/LuaJIT/jit/bc.lua +++ b/lib/LuaJIT/src/jit/bc.lua diff --git a/lib/LuaJIT/jit/bcsave.lua b/lib/LuaJIT/src/jit/bcsave.lua index 2553d97..2553d97 100644 --- a/lib/LuaJIT/jit/bcsave.lua +++ b/lib/LuaJIT/src/jit/bcsave.lua diff --git a/lib/LuaJIT/jit/dis_arm.lua b/lib/LuaJIT/src/jit/dis_arm.lua index c2dd776..c2dd776 100644 --- a/lib/LuaJIT/jit/dis_arm.lua +++ b/lib/LuaJIT/src/jit/dis_arm.lua diff --git a/lib/LuaJIT/jit/dis_arm64.lua b/lib/LuaJIT/src/jit/dis_arm64.lua index a717332..a717332 100644 --- a/lib/LuaJIT/jit/dis_arm64.lua +++ b/lib/LuaJIT/src/jit/dis_arm64.lua diff --git a/lib/LuaJIT/jit/dis_arm64be.lua b/lib/LuaJIT/src/jit/dis_arm64be.lua index 7eb389e..7eb389e 100644 --- a/lib/LuaJIT/jit/dis_arm64be.lua +++ b/lib/LuaJIT/src/jit/dis_arm64be.lua diff --git a/lib/LuaJIT/jit/dis_mips.lua b/lib/LuaJIT/src/jit/dis_mips.lua index a12b8e6..a12b8e6 100644 --- a/lib/LuaJIT/jit/dis_mips.lua +++ b/lib/LuaJIT/src/jit/dis_mips.lua diff --git a/lib/LuaJIT/jit/dis_mips64.lua b/lib/LuaJIT/src/jit/dis_mips64.lua index c437492..c437492 100644 --- a/lib/LuaJIT/jit/dis_mips64.lua +++ b/lib/LuaJIT/src/jit/dis_mips64.lua diff --git a/lib/LuaJIT/jit/dis_mips64el.lua b/lib/LuaJIT/src/jit/dis_mips64el.lua index 2b1470a..2b1470a 100644 --- a/lib/LuaJIT/jit/dis_mips64el.lua +++ b/lib/LuaJIT/src/jit/dis_mips64el.lua diff --git a/lib/LuaJIT/jit/dis_mipsel.lua b/lib/LuaJIT/src/jit/dis_mipsel.lua index f69b11f..f69b11f 100644 --- a/lib/LuaJIT/jit/dis_mipsel.lua +++ b/lib/LuaJIT/src/jit/dis_mipsel.lua diff --git a/lib/LuaJIT/jit/dis_ppc.lua b/lib/LuaJIT/src/jit/dis_ppc.lua index 2aeb1b2..2aeb1b2 100644 --- a/lib/LuaJIT/jit/dis_ppc.lua +++ b/lib/LuaJIT/src/jit/dis_ppc.lua diff --git a/lib/LuaJIT/jit/dis_x64.lua b/lib/LuaJIT/src/jit/dis_x64.lua index d5714ee..d5714ee 100644 --- a/lib/LuaJIT/jit/dis_x64.lua +++ b/lib/LuaJIT/src/jit/dis_x64.lua diff --git a/lib/LuaJIT/jit/dis_x86.lua b/lib/LuaJIT/src/jit/dis_x86.lua index 3a68c93..3a68c93 100644 --- a/lib/LuaJIT/jit/dis_x86.lua +++ b/lib/LuaJIT/src/jit/dis_x86.lua diff --git a/lib/LuaJIT/jit/dump.lua b/lib/LuaJIT/src/jit/dump.lua index 2bea652..2bea652 100644 --- a/lib/LuaJIT/jit/dump.lua +++ b/lib/LuaJIT/src/jit/dump.lua diff --git a/lib/LuaJIT/jit/p.lua b/lib/LuaJIT/src/jit/p.lua index 7be1058..7be1058 100644 --- a/lib/LuaJIT/jit/p.lua +++ b/lib/LuaJIT/src/jit/p.lua diff --git a/lib/LuaJIT/jit/v.lua b/lib/LuaJIT/src/jit/v.lua index 934de98..934de98 100644 --- a/lib/LuaJIT/jit/v.lua +++ b/lib/LuaJIT/src/jit/v.lua diff --git a/lib/LuaJIT/jit/zone.lua b/lib/LuaJIT/src/jit/zone.lua index fa702c4..fa702c4 100644 --- a/lib/LuaJIT/jit/zone.lua +++ b/lib/LuaJIT/src/jit/zone.lua diff --git a/lib/LuaJIT/lauxlib.h b/lib/LuaJIT/src/lauxlib.h index a44f027..a44f027 100644 --- a/lib/LuaJIT/lauxlib.h +++ b/lib/LuaJIT/src/lauxlib.h diff --git a/lib/LuaJIT/lib_aux.c b/lib/LuaJIT/src/lib_aux.c index 2682a38..2682a38 100644 --- a/lib/LuaJIT/lib_aux.c +++ b/lib/LuaJIT/src/lib_aux.c diff --git a/lib/LuaJIT/lib_base.c b/lib/LuaJIT/src/lib_base.c index 1cd8305..1cd8305 100644 --- a/lib/LuaJIT/lib_base.c +++ b/lib/LuaJIT/src/lib_base.c diff --git a/lib/LuaJIT/lib_bit.c b/lib/LuaJIT/src/lib_bit.c index c979a44..c979a44 100644 --- a/lib/LuaJIT/lib_bit.c +++ b/lib/LuaJIT/src/lib_bit.c diff --git a/lib/LuaJIT/lib_debug.c b/lib/LuaJIT/src/lib_debug.c index f112b5b..f112b5b 100644 --- a/lib/LuaJIT/lib_debug.c +++ b/lib/LuaJIT/src/lib_debug.c diff --git a/lib/LuaJIT/lib_ffi.c b/lib/LuaJIT/src/lib_ffi.c index 8032411..8032411 100644 --- a/lib/LuaJIT/lib_ffi.c +++ b/lib/LuaJIT/src/lib_ffi.c diff --git a/lib/LuaJIT/lib_init.c b/lib/LuaJIT/src/lib_init.c index 2ed370e..2ed370e 100644 --- a/lib/LuaJIT/lib_init.c +++ b/lib/LuaJIT/src/lib_init.c diff --git a/lib/LuaJIT/lib_io.c b/lib/LuaJIT/src/lib_io.c index 73fd932..73fd932 100644 --- a/lib/LuaJIT/lib_io.c +++ b/lib/LuaJIT/src/lib_io.c diff --git a/lib/LuaJIT/lib_jit.c b/lib/LuaJIT/src/lib_jit.c index 22ca0a1..22ca0a1 100644 --- a/lib/LuaJIT/lib_jit.c +++ b/lib/LuaJIT/src/lib_jit.c diff --git a/lib/LuaJIT/lib_math.c b/lib/LuaJIT/src/lib_math.c index ef9dda2..ef9dda2 100644 --- a/lib/LuaJIT/lib_math.c +++ b/lib/LuaJIT/src/lib_math.c diff --git a/lib/LuaJIT/lib_os.c b/lib/LuaJIT/src/lib_os.c index ffbc3fd..ffbc3fd 100644 --- a/lib/LuaJIT/lib_os.c +++ b/lib/LuaJIT/src/lib_os.c diff --git a/lib/LuaJIT/lib_package.c b/lib/LuaJIT/src/lib_package.c index bedd6d7..bedd6d7 100644 --- a/lib/LuaJIT/lib_package.c +++ b/lib/LuaJIT/src/lib_package.c diff --git a/lib/LuaJIT/lib_string.c b/lib/LuaJIT/src/lib_string.c index 76b0730..76b0730 100644 --- a/lib/LuaJIT/lib_string.c +++ b/lib/LuaJIT/src/lib_string.c diff --git a/lib/LuaJIT/lib_table.c b/lib/LuaJIT/src/lib_table.c index 0450f1f..0450f1f 100644 --- a/lib/LuaJIT/lib_table.c +++ b/lib/LuaJIT/src/lib_table.c diff --git a/lib/LuaJIT/lj.supp b/lib/LuaJIT/src/lj.supp index 217f7c8..217f7c8 100644 --- a/lib/LuaJIT/lj.supp +++ b/lib/LuaJIT/src/lj.supp diff --git a/lib/LuaJIT/lj_alloc.c b/lib/LuaJIT/src/lj_alloc.c index 33a2eb8..33a2eb8 100644 --- a/lib/LuaJIT/lj_alloc.c +++ b/lib/LuaJIT/src/lj_alloc.c diff --git a/lib/LuaJIT/lj_alloc.h b/lib/LuaJIT/src/lj_alloc.h index f87a7cf..f87a7cf 100644 --- a/lib/LuaJIT/lj_alloc.h +++ b/lib/LuaJIT/src/lj_alloc.h diff --git a/lib/LuaJIT/lj_api.c b/lib/LuaJIT/src/lj_api.c index d17a575..d17a575 100644 --- a/lib/LuaJIT/lj_api.c +++ b/lib/LuaJIT/src/lj_api.c diff --git a/lib/LuaJIT/lj_arch.h b/lib/LuaJIT/src/lj_arch.h index 31a1159..31a1159 100644 --- a/lib/LuaJIT/lj_arch.h +++ b/lib/LuaJIT/src/lj_arch.h diff --git a/lib/LuaJIT/lj_asm.c b/lib/LuaJIT/src/lj_asm.c index 992dcf5..992dcf5 100644 --- a/lib/LuaJIT/lj_asm.c +++ b/lib/LuaJIT/src/lj_asm.c diff --git a/lib/LuaJIT/lj_asm.h b/lib/LuaJIT/src/lj_asm.h index 2819481..2819481 100644 --- a/lib/LuaJIT/lj_asm.h +++ b/lib/LuaJIT/src/lj_asm.h diff --git a/lib/LuaJIT/lj_asm_arm.h b/lib/LuaJIT/src/lj_asm_arm.h index 37bfa40..37bfa40 100644 --- a/lib/LuaJIT/lj_asm_arm.h +++ b/lib/LuaJIT/src/lj_asm_arm.h diff --git a/lib/LuaJIT/lj_asm_arm64.h b/lib/LuaJIT/src/lj_asm_arm64.h index baafa21..baafa21 100644 --- a/lib/LuaJIT/lj_asm_arm64.h +++ b/lib/LuaJIT/src/lj_asm_arm64.h diff --git a/lib/LuaJIT/lj_asm_mips.h b/lib/LuaJIT/src/lj_asm_mips.h index 3a4679b..3a4679b 100644 --- a/lib/LuaJIT/lj_asm_mips.h +++ b/lib/LuaJIT/src/lj_asm_mips.h diff --git a/lib/LuaJIT/lj_asm_ppc.h b/lib/LuaJIT/src/lj_asm_ppc.h index 1955429..1955429 100644 --- a/lib/LuaJIT/lj_asm_ppc.h +++ b/lib/LuaJIT/src/lj_asm_ppc.h diff --git a/lib/LuaJIT/lj_asm_x86.h b/lib/LuaJIT/src/lj_asm_x86.h index af54dc7..af54dc7 100644 --- a/lib/LuaJIT/lj_asm_x86.h +++ b/lib/LuaJIT/src/lj_asm_x86.h diff --git a/lib/LuaJIT/lj_bc.c b/lib/LuaJIT/src/lj_bc.c index a597692..a597692 100644 --- a/lib/LuaJIT/lj_bc.c +++ b/lib/LuaJIT/src/lj_bc.c diff --git a/lib/LuaJIT/lj_bc.h b/lib/LuaJIT/src/lj_bc.h index 69a45f2..69a45f2 100644 --- a/lib/LuaJIT/lj_bc.h +++ b/lib/LuaJIT/src/lj_bc.h diff --git a/lib/LuaJIT/lj_bcdump.h b/lib/LuaJIT/src/lj_bcdump.h index fdfc6ec..fdfc6ec 100644 --- a/lib/LuaJIT/lj_bcdump.h +++ b/lib/LuaJIT/src/lj_bcdump.h diff --git a/lib/LuaJIT/lj_bcread.c b/lib/LuaJIT/src/lj_bcread.c index 48c5e7c..48c5e7c 100644 --- a/lib/LuaJIT/lj_bcread.c +++ b/lib/LuaJIT/src/lj_bcread.c diff --git a/lib/LuaJIT/lj_bcwrite.c b/lib/LuaJIT/src/lj_bcwrite.c index 5e05cae..5e05cae 100644 --- a/lib/LuaJIT/lj_bcwrite.c +++ b/lib/LuaJIT/src/lj_bcwrite.c diff --git a/lib/LuaJIT/lj_buf.c b/lib/LuaJIT/src/lj_buf.c index 0dfe7f9..0dfe7f9 100644 --- a/lib/LuaJIT/lj_buf.c +++ b/lib/LuaJIT/src/lj_buf.c diff --git a/lib/LuaJIT/lj_buf.h b/lib/LuaJIT/src/lj_buf.h index a405169..a405169 100644 --- a/lib/LuaJIT/lj_buf.h +++ b/lib/LuaJIT/src/lj_buf.h diff --git a/lib/LuaJIT/lj_carith.c b/lib/LuaJIT/src/lj_carith.c index 1c050eb..1c050eb 100644 --- a/lib/LuaJIT/lj_carith.c +++ b/lib/LuaJIT/src/lj_carith.c diff --git a/lib/LuaJIT/lj_carith.h b/lib/LuaJIT/src/lj_carith.h index 3b0a5dd..3b0a5dd 100644 --- a/lib/LuaJIT/lj_carith.h +++ b/lib/LuaJIT/src/lj_carith.h diff --git a/lib/LuaJIT/lj_ccall.c b/lib/LuaJIT/src/lj_ccall.c index 25e938c..25e938c 100644 --- a/lib/LuaJIT/lj_ccall.c +++ b/lib/LuaJIT/src/lj_ccall.c diff --git a/lib/LuaJIT/lj_ccall.h b/lib/LuaJIT/src/lj_ccall.h index 6efa48c..6efa48c 100644 --- a/lib/LuaJIT/lj_ccall.h +++ b/lib/LuaJIT/src/lj_ccall.h diff --git a/lib/LuaJIT/lj_ccallback.c b/lib/LuaJIT/src/lj_ccallback.c index 412dbf8..412dbf8 100644 --- a/lib/LuaJIT/lj_ccallback.c +++ b/lib/LuaJIT/src/lj_ccallback.c diff --git a/lib/LuaJIT/lj_ccallback.h b/lib/LuaJIT/src/lj_ccallback.h index a8cdad3..a8cdad3 100644 --- a/lib/LuaJIT/lj_ccallback.h +++ b/lib/LuaJIT/src/lj_ccallback.h diff --git a/lib/LuaJIT/lj_cconv.c b/lib/LuaJIT/src/lj_cconv.c index 13b8230..13b8230 100644 --- a/lib/LuaJIT/lj_cconv.c +++ b/lib/LuaJIT/src/lj_cconv.c diff --git a/lib/LuaJIT/lj_cconv.h b/lib/LuaJIT/src/lj_cconv.h index 0a0b66c..0a0b66c 100644 --- a/lib/LuaJIT/lj_cconv.h +++ b/lib/LuaJIT/src/lj_cconv.h diff --git a/lib/LuaJIT/lj_cdata.c b/lib/LuaJIT/src/lj_cdata.c index 68e16d7..68e16d7 100644 --- a/lib/LuaJIT/lj_cdata.c +++ b/lib/LuaJIT/src/lj_cdata.c diff --git a/lib/LuaJIT/lj_cdata.h b/lib/LuaJIT/src/lj_cdata.h index 5bb0f5d..5bb0f5d 100644 --- a/lib/LuaJIT/lj_cdata.h +++ b/lib/LuaJIT/src/lj_cdata.h diff --git a/lib/LuaJIT/lj_char.c b/lib/LuaJIT/src/lj_char.c index 11f23ef..11f23ef 100644 --- a/lib/LuaJIT/lj_char.c +++ b/lib/LuaJIT/src/lj_char.c diff --git a/lib/LuaJIT/lj_char.h b/lib/LuaJIT/src/lj_char.h index c3c86d3..c3c86d3 100644 --- a/lib/LuaJIT/lj_char.h +++ b/lib/LuaJIT/src/lj_char.h diff --git a/lib/LuaJIT/lj_clib.c b/lib/LuaJIT/src/lj_clib.c index f016b06..f016b06 100644 --- a/lib/LuaJIT/lj_clib.c +++ b/lib/LuaJIT/src/lj_clib.c diff --git a/lib/LuaJIT/lj_clib.h b/lib/LuaJIT/src/lj_clib.h index fcc9dac..fcc9dac 100644 --- a/lib/LuaJIT/lj_clib.h +++ b/lib/LuaJIT/src/lj_clib.h diff --git a/lib/LuaJIT/lj_cparse.c b/lib/LuaJIT/src/lj_cparse.c index 19f632f..19f632f 100644 --- a/lib/LuaJIT/lj_cparse.c +++ b/lib/LuaJIT/src/lj_cparse.c diff --git a/lib/LuaJIT/lj_cparse.h b/lib/LuaJIT/src/lj_cparse.h index bad1060..bad1060 100644 --- a/lib/LuaJIT/lj_cparse.h +++ b/lib/LuaJIT/src/lj_cparse.h diff --git a/lib/LuaJIT/lj_crecord.c b/lib/LuaJIT/src/lj_crecord.c index d425686..d425686 100644 --- a/lib/LuaJIT/lj_crecord.c +++ b/lib/LuaJIT/src/lj_crecord.c diff --git a/lib/LuaJIT/lj_crecord.h b/lib/LuaJIT/src/lj_crecord.h index c165def..c165def 100644 --- a/lib/LuaJIT/lj_crecord.h +++ b/lib/LuaJIT/src/lj_crecord.h diff --git a/lib/LuaJIT/lj_ctype.c b/lib/LuaJIT/src/lj_ctype.c index 0ea89c7..0ea89c7 100644 --- a/lib/LuaJIT/lj_ctype.c +++ b/lib/LuaJIT/src/lj_ctype.c diff --git a/lib/LuaJIT/lj_ctype.h b/lib/LuaJIT/src/lj_ctype.h index 0c220a8..0c220a8 100644 --- a/lib/LuaJIT/lj_ctype.h +++ b/lib/LuaJIT/src/lj_ctype.h diff --git a/lib/LuaJIT/lj_debug.c b/lib/LuaJIT/src/lj_debug.c index 959dc28..959dc28 100644 --- a/lib/LuaJIT/lj_debug.c +++ b/lib/LuaJIT/src/lj_debug.c diff --git a/lib/LuaJIT/lj_debug.h b/lib/LuaJIT/src/lj_debug.h index 5917c00..5917c00 100644 --- a/lib/LuaJIT/lj_debug.h +++ b/lib/LuaJIT/src/lj_debug.h diff --git a/lib/LuaJIT/lj_def.h b/lib/LuaJIT/src/lj_def.h index e67bb24..e67bb24 100644 --- a/lib/LuaJIT/lj_def.h +++ b/lib/LuaJIT/src/lj_def.h diff --git a/lib/LuaJIT/lj_dispatch.c b/lib/LuaJIT/src/lj_dispatch.c index 5d6795f..5d6795f 100644 --- a/lib/LuaJIT/lj_dispatch.c +++ b/lib/LuaJIT/src/lj_dispatch.c diff --git a/lib/LuaJIT/lj_dispatch.h b/lib/LuaJIT/src/lj_dispatch.h index 5bda51a..5bda51a 100644 --- a/lib/LuaJIT/lj_dispatch.h +++ b/lib/LuaJIT/src/lj_dispatch.h diff --git a/lib/LuaJIT/lj_emit_arm.h b/lib/LuaJIT/src/lj_emit_arm.h index dee8bdc..dee8bdc 100644 --- a/lib/LuaJIT/lj_emit_arm.h +++ b/lib/LuaJIT/src/lj_emit_arm.h diff --git a/lib/LuaJIT/lj_emit_arm64.h b/lib/LuaJIT/src/lj_emit_arm64.h index 1001b1d..1001b1d 100644 --- a/lib/LuaJIT/lj_emit_arm64.h +++ b/lib/LuaJIT/src/lj_emit_arm64.h diff --git a/lib/LuaJIT/lj_emit_mips.h b/lib/LuaJIT/src/lj_emit_mips.h index bb6593a..bb6593a 100644 --- a/lib/LuaJIT/lj_emit_mips.h +++ b/lib/LuaJIT/src/lj_emit_mips.h diff --git a/lib/LuaJIT/lj_emit_ppc.h b/lib/LuaJIT/src/lj_emit_ppc.h index 21c3c2a..21c3c2a 100644 --- a/lib/LuaJIT/lj_emit_ppc.h +++ b/lib/LuaJIT/src/lj_emit_ppc.h diff --git a/lib/LuaJIT/lj_emit_x86.h b/lib/LuaJIT/src/lj_emit_x86.h index b3dc4ea..b3dc4ea 100644 --- a/lib/LuaJIT/lj_emit_x86.h +++ b/lib/LuaJIT/src/lj_emit_x86.h diff --git a/lib/LuaJIT/lj_err.c b/lib/LuaJIT/src/lj_err.c index abf176e..abf176e 100644 --- a/lib/LuaJIT/lj_err.c +++ b/lib/LuaJIT/src/lj_err.c diff --git a/lib/LuaJIT/lj_err.h b/lib/LuaJIT/src/lj_err.h index cba5fb7..cba5fb7 100644 --- a/lib/LuaJIT/lj_err.h +++ b/lib/LuaJIT/src/lj_err.h diff --git a/lib/LuaJIT/lj_errmsg.h b/lib/LuaJIT/src/lj_errmsg.h index 060a9f8..060a9f8 100644 --- a/lib/LuaJIT/lj_errmsg.h +++ b/lib/LuaJIT/src/lj_errmsg.h diff --git a/lib/LuaJIT/lj_ff.h b/lib/LuaJIT/src/lj_ff.h index 31d65a0..31d65a0 100644 --- a/lib/LuaJIT/lj_ff.h +++ b/lib/LuaJIT/src/lj_ff.h diff --git a/lib/LuaJIT/lj_ffrecord.c b/lib/LuaJIT/src/lj_ffrecord.c index 849d7a2..849d7a2 100644 --- a/lib/LuaJIT/lj_ffrecord.c +++ b/lib/LuaJIT/src/lj_ffrecord.c diff --git a/lib/LuaJIT/lj_ffrecord.h b/lib/LuaJIT/src/lj_ffrecord.h index 3b40745..3b40745 100644 --- a/lib/LuaJIT/lj_ffrecord.h +++ b/lib/LuaJIT/src/lj_ffrecord.h diff --git a/lib/LuaJIT/lj_frame.h b/lib/LuaJIT/src/lj_frame.h index 04cb5a3..04cb5a3 100644 --- a/lib/LuaJIT/lj_frame.h +++ b/lib/LuaJIT/src/lj_frame.h diff --git a/lib/LuaJIT/lj_func.c b/lib/LuaJIT/src/lj_func.c index 639dad8..639dad8 100644 --- a/lib/LuaJIT/lj_func.c +++ b/lib/LuaJIT/src/lj_func.c diff --git a/lib/LuaJIT/lj_func.h b/lib/LuaJIT/src/lj_func.h index 901751b..901751b 100644 --- a/lib/LuaJIT/lj_func.h +++ b/lib/LuaJIT/src/lj_func.h diff --git a/lib/LuaJIT/lj_gc.c b/lib/LuaJIT/src/lj_gc.c index 2aaf5b2..2aaf5b2 100644 --- a/lib/LuaJIT/lj_gc.c +++ b/lib/LuaJIT/src/lj_gc.c diff --git a/lib/LuaJIT/lj_gc.h b/lib/LuaJIT/src/lj_gc.h index 669bbe9..669bbe9 100644 --- a/lib/LuaJIT/lj_gc.h +++ b/lib/LuaJIT/src/lj_gc.h diff --git a/lib/LuaJIT/lj_gdbjit.c b/lib/LuaJIT/src/lj_gdbjit.c index c219ffa..c219ffa 100644 --- a/lib/LuaJIT/lj_gdbjit.c +++ b/lib/LuaJIT/src/lj_gdbjit.c diff --git a/lib/LuaJIT/lj_gdbjit.h b/lib/LuaJIT/src/lj_gdbjit.h index bbaa156..bbaa156 100644 --- a/lib/LuaJIT/lj_gdbjit.h +++ b/lib/LuaJIT/src/lj_gdbjit.h diff --git a/lib/LuaJIT/lj_ir.c b/lib/LuaJIT/src/lj_ir.c index 5baece6..5baece6 100644 --- a/lib/LuaJIT/lj_ir.c +++ b/lib/LuaJIT/src/lj_ir.c diff --git a/lib/LuaJIT/lj_ir.h b/lib/LuaJIT/src/lj_ir.h index 8057a75..8057a75 100644 --- a/lib/LuaJIT/lj_ir.h +++ b/lib/LuaJIT/src/lj_ir.h diff --git a/lib/LuaJIT/lj_ircall.h b/lib/LuaJIT/src/lj_ircall.h index 9b3883b..9b3883b 100644 --- a/lib/LuaJIT/lj_ircall.h +++ b/lib/LuaJIT/src/lj_ircall.h diff --git a/lib/LuaJIT/lj_iropt.h b/lib/LuaJIT/src/lj_iropt.h index a59ba3f..a59ba3f 100644 --- a/lib/LuaJIT/lj_iropt.h +++ b/lib/LuaJIT/src/lj_iropt.h diff --git a/lib/LuaJIT/lj_jit.h b/lib/LuaJIT/src/lj_jit.h index 5d41ef4..5d41ef4 100644 --- a/lib/LuaJIT/lj_jit.h +++ b/lib/LuaJIT/src/lj_jit.h diff --git a/lib/LuaJIT/lj_lex.c b/lib/LuaJIT/src/lj_lex.c index 2d2f819..2d2f819 100644 --- a/lib/LuaJIT/lj_lex.c +++ b/lib/LuaJIT/src/lj_lex.c diff --git a/lib/LuaJIT/lj_lex.h b/lib/LuaJIT/src/lj_lex.h index 33fa865..33fa865 100644 --- a/lib/LuaJIT/lj_lex.h +++ b/lib/LuaJIT/src/lj_lex.h diff --git a/lib/LuaJIT/lj_lib.c b/lib/LuaJIT/src/lj_lib.c index b8638de..b8638de 100644 --- a/lib/LuaJIT/lj_lib.c +++ b/lib/LuaJIT/src/lj_lib.c diff --git a/lib/LuaJIT/lj_lib.h b/lib/LuaJIT/src/lj_lib.h index 37ec9d7..37ec9d7 100644 --- a/lib/LuaJIT/lj_lib.h +++ b/lib/LuaJIT/src/lj_lib.h diff --git a/lib/LuaJIT/lj_load.c b/lib/LuaJIT/src/lj_load.c index 9a31d9a..9a31d9a 100644 --- a/lib/LuaJIT/lj_load.c +++ b/lib/LuaJIT/src/lj_load.c diff --git a/lib/LuaJIT/lj_mcode.c b/lib/LuaJIT/src/lj_mcode.c index 64b0ca9..64b0ca9 100644 --- a/lib/LuaJIT/lj_mcode.c +++ b/lib/LuaJIT/src/lj_mcode.c diff --git a/lib/LuaJIT/lj_mcode.h b/lib/LuaJIT/src/lj_mcode.h index f0847e9..f0847e9 100644 --- a/lib/LuaJIT/lj_mcode.h +++ b/lib/LuaJIT/src/lj_mcode.h diff --git a/lib/LuaJIT/lj_meta.c b/lib/LuaJIT/src/lj_meta.c index 0bd4d84..0bd4d84 100644 --- a/lib/LuaJIT/lj_meta.c +++ b/lib/LuaJIT/src/lj_meta.c diff --git a/lib/LuaJIT/lj_meta.h b/lib/LuaJIT/src/lj_meta.h index 73b4572..73b4572 100644 --- a/lib/LuaJIT/lj_meta.h +++ b/lib/LuaJIT/src/lj_meta.h diff --git a/lib/LuaJIT/lj_obj.c b/lib/LuaJIT/src/lj_obj.c index ee33aeb..ee33aeb 100644 --- a/lib/LuaJIT/lj_obj.c +++ b/lib/LuaJIT/src/lj_obj.c diff --git a/lib/LuaJIT/lj_obj.h b/lib/LuaJIT/src/lj_obj.h index 72b7ace..72b7ace 100644 --- a/lib/LuaJIT/lj_obj.h +++ b/lib/LuaJIT/src/lj_obj.h diff --git a/lib/LuaJIT/lj_opt_dce.c b/lib/LuaJIT/src/lj_opt_dce.c index 2417f32..2417f32 100644 --- a/lib/LuaJIT/lj_opt_dce.c +++ b/lib/LuaJIT/src/lj_opt_dce.c diff --git a/lib/LuaJIT/lj_opt_fold.c b/lib/LuaJIT/src/lj_opt_fold.c index 9873b47..9873b47 100644 --- a/lib/LuaJIT/lj_opt_fold.c +++ b/lib/LuaJIT/src/lj_opt_fold.c diff --git a/lib/LuaJIT/lj_opt_loop.c b/lib/LuaJIT/src/lj_opt_loop.c index 441b8ad..441b8ad 100644 --- a/lib/LuaJIT/lj_opt_loop.c +++ b/lib/LuaJIT/src/lj_opt_loop.c diff --git a/lib/LuaJIT/lj_opt_mem.c b/lib/LuaJIT/src/lj_opt_mem.c index cc177d3..cc177d3 100644 --- a/lib/LuaJIT/lj_opt_mem.c +++ b/lib/LuaJIT/src/lj_opt_mem.c diff --git a/lib/LuaJIT/lj_opt_narrow.c b/lib/LuaJIT/src/lj_opt_narrow.c index cd96ca4..cd96ca4 100644 --- a/lib/LuaJIT/lj_opt_narrow.c +++ b/lib/LuaJIT/src/lj_opt_narrow.c diff --git a/lib/LuaJIT/lj_opt_sink.c b/lib/LuaJIT/src/lj_opt_sink.c index c16363e..c16363e 100644 --- a/lib/LuaJIT/lj_opt_sink.c +++ b/lib/LuaJIT/src/lj_opt_sink.c diff --git a/lib/LuaJIT/lj_opt_split.c b/lib/LuaJIT/src/lj_opt_split.c index 79ac3cc..79ac3cc 100644 --- a/lib/LuaJIT/lj_opt_split.c +++ b/lib/LuaJIT/src/lj_opt_split.c diff --git a/lib/LuaJIT/lj_parse.c b/lib/LuaJIT/src/lj_parse.c index c8efafa..c8efafa 100644 --- a/lib/LuaJIT/lj_parse.c +++ b/lib/LuaJIT/src/lj_parse.c diff --git a/lib/LuaJIT/lj_parse.h b/lib/LuaJIT/src/lj_parse.h index ceeab69..ceeab69 100644 --- a/lib/LuaJIT/lj_parse.h +++ b/lib/LuaJIT/src/lj_parse.h diff --git a/lib/LuaJIT/lj_profile.c b/lib/LuaJIT/src/lj_profile.c index 3223697..3223697 100644 --- a/lib/LuaJIT/lj_profile.c +++ b/lib/LuaJIT/src/lj_profile.c diff --git a/lib/LuaJIT/lj_profile.h b/lib/LuaJIT/src/lj_profile.h index 0cccfd7..0cccfd7 100644 --- a/lib/LuaJIT/lj_profile.h +++ b/lib/LuaJIT/src/lj_profile.h diff --git a/lib/LuaJIT/lj_record.c b/lib/LuaJIT/src/lj_record.c index 7f37d6c..7f37d6c 100644 --- a/lib/LuaJIT/lj_record.c +++ b/lib/LuaJIT/src/lj_record.c diff --git a/lib/LuaJIT/lj_record.h b/lib/LuaJIT/src/lj_record.h index 93d374d..93d374d 100644 --- a/lib/LuaJIT/lj_record.h +++ b/lib/LuaJIT/src/lj_record.h diff --git a/lib/LuaJIT/lj_snap.c b/lib/LuaJIT/src/lj_snap.c index ceaf2ca..ceaf2ca 100644 --- a/lib/LuaJIT/lj_snap.c +++ b/lib/LuaJIT/src/lj_snap.c diff --git a/lib/LuaJIT/lj_snap.h b/lib/LuaJIT/src/lj_snap.h index 2c9ae3d..2c9ae3d 100644 --- a/lib/LuaJIT/lj_snap.h +++ b/lib/LuaJIT/src/lj_snap.h diff --git a/lib/LuaJIT/lj_state.c b/lib/LuaJIT/src/lj_state.c index 632dd07..632dd07 100644 --- a/lib/LuaJIT/lj_state.c +++ b/lib/LuaJIT/src/lj_state.c diff --git a/lib/LuaJIT/lj_state.h b/lib/LuaJIT/src/lj_state.h index 02a0eaf..02a0eaf 100644 --- a/lib/LuaJIT/lj_state.h +++ b/lib/LuaJIT/src/lj_state.h diff --git a/lib/LuaJIT/lj_str.c b/lib/LuaJIT/src/lj_str.c index 264dedc..264dedc 100644 --- a/lib/LuaJIT/lj_str.c +++ b/lib/LuaJIT/src/lj_str.c diff --git a/lib/LuaJIT/lj_str.h b/lib/LuaJIT/src/lj_str.h index 85c1e40..85c1e40 100644 --- a/lib/LuaJIT/lj_str.h +++ b/lib/LuaJIT/src/lj_str.h diff --git a/lib/LuaJIT/lj_strfmt.c b/lib/LuaJIT/src/lj_strfmt.c index d7893ce..d7893ce 100644 --- a/lib/LuaJIT/lj_strfmt.c +++ b/lib/LuaJIT/src/lj_strfmt.c diff --git a/lib/LuaJIT/lj_strfmt.h b/lib/LuaJIT/src/lj_strfmt.h index 6e1d901..6e1d901 100644 --- a/lib/LuaJIT/lj_strfmt.h +++ b/lib/LuaJIT/src/lj_strfmt.h diff --git a/lib/LuaJIT/lj_strfmt_num.c b/lib/LuaJIT/src/lj_strfmt_num.c index 9271f68..9271f68 100644 --- a/lib/LuaJIT/lj_strfmt_num.c +++ b/lib/LuaJIT/src/lj_strfmt_num.c diff --git a/lib/LuaJIT/lj_strscan.c b/lib/LuaJIT/src/lj_strscan.c index f5f35c9..f5f35c9 100644 --- a/lib/LuaJIT/lj_strscan.c +++ b/lib/LuaJIT/src/lj_strscan.c diff --git a/lib/LuaJIT/lj_strscan.h b/lib/LuaJIT/src/lj_strscan.h index 6fb0dda..6fb0dda 100644 --- a/lib/LuaJIT/lj_strscan.h +++ b/lib/LuaJIT/src/lj_strscan.h diff --git a/lib/LuaJIT/lj_tab.c b/lib/LuaJIT/src/lj_tab.c index c51666d..c51666d 100644 --- a/lib/LuaJIT/lj_tab.c +++ b/lib/LuaJIT/src/lj_tab.c diff --git a/lib/LuaJIT/lj_tab.h b/lib/LuaJIT/src/lj_tab.h index 71e3494..71e3494 100644 --- a/lib/LuaJIT/lj_tab.h +++ b/lib/LuaJIT/src/lj_tab.h diff --git a/lib/LuaJIT/lj_target.h b/lib/LuaJIT/src/lj_target.h index 8dcae95..8dcae95 100644 --- a/lib/LuaJIT/lj_target.h +++ b/lib/LuaJIT/src/lj_target.h diff --git a/lib/LuaJIT/lj_target_arm.h b/lib/LuaJIT/src/lj_target_arm.h index 5551b1f..5551b1f 100644 --- a/lib/LuaJIT/lj_target_arm.h +++ b/lib/LuaJIT/src/lj_target_arm.h diff --git a/lib/LuaJIT/lj_target_arm64.h b/lib/LuaJIT/src/lj_target_arm64.h index a207a2b..a207a2b 100644 --- a/lib/LuaJIT/lj_target_arm64.h +++ b/lib/LuaJIT/src/lj_target_arm64.h diff --git a/lib/LuaJIT/lj_target_mips.h b/lib/LuaJIT/src/lj_target_mips.h index 740687b..740687b 100644 --- a/lib/LuaJIT/lj_target_mips.h +++ b/lib/LuaJIT/src/lj_target_mips.h diff --git a/lib/LuaJIT/lj_target_ppc.h b/lib/LuaJIT/src/lj_target_ppc.h index c5c991a..c5c991a 100644 --- a/lib/LuaJIT/lj_target_ppc.h +++ b/lib/LuaJIT/src/lj_target_ppc.h diff --git a/lib/LuaJIT/lj_target_x86.h b/lib/LuaJIT/src/lj_target_x86.h index 356f792..356f792 100644 --- a/lib/LuaJIT/lj_target_x86.h +++ b/lib/LuaJIT/src/lj_target_x86.h diff --git a/lib/LuaJIT/lj_trace.c b/lib/LuaJIT/src/lj_trace.c index d85b47f..d85b47f 100644 --- a/lib/LuaJIT/lj_trace.c +++ b/lib/LuaJIT/src/lj_trace.c diff --git a/lib/LuaJIT/lj_trace.h b/lib/LuaJIT/src/lj_trace.h index 22cae74..22cae74 100644 --- a/lib/LuaJIT/lj_trace.h +++ b/lib/LuaJIT/src/lj_trace.h diff --git a/lib/LuaJIT/lj_traceerr.h b/lib/LuaJIT/src/lj_traceerr.h index 1363c4f..1363c4f 100644 --- a/lib/LuaJIT/lj_traceerr.h +++ b/lib/LuaJIT/src/lj_traceerr.h diff --git a/lib/LuaJIT/lj_udata.c b/lib/LuaJIT/src/lj_udata.c index bd0321b..bd0321b 100644 --- a/lib/LuaJIT/lj_udata.c +++ b/lib/LuaJIT/src/lj_udata.c diff --git a/lib/LuaJIT/lj_udata.h b/lib/LuaJIT/src/lj_udata.h index f271a42..f271a42 100644 --- a/lib/LuaJIT/lj_udata.h +++ b/lib/LuaJIT/src/lj_udata.h diff --git a/lib/LuaJIT/lj_vm.h b/lib/LuaJIT/src/lj_vm.h index 1cc7eed..1cc7eed 100644 --- a/lib/LuaJIT/lj_vm.h +++ b/lib/LuaJIT/src/lj_vm.h diff --git a/lib/LuaJIT/lj_vmevent.c b/lib/LuaJIT/src/lj_vmevent.c index 8664080..8664080 100644 --- a/lib/LuaJIT/lj_vmevent.c +++ b/lib/LuaJIT/src/lj_vmevent.c diff --git a/lib/LuaJIT/lj_vmevent.h b/lib/LuaJIT/src/lj_vmevent.h index 050fb4d..050fb4d 100644 --- a/lib/LuaJIT/lj_vmevent.h +++ b/lib/LuaJIT/src/lj_vmevent.h diff --git a/lib/LuaJIT/lj_vmmath.c b/lib/LuaJIT/src/lj_vmmath.c index b231d3e..b231d3e 100644 --- a/lib/LuaJIT/lj_vmmath.c +++ b/lib/LuaJIT/src/lj_vmmath.c diff --git a/lib/LuaJIT/ljamalg.c b/lib/LuaJIT/src/ljamalg.c index f1f2862..f1f2862 100644 --- a/lib/LuaJIT/ljamalg.c +++ b/lib/LuaJIT/src/ljamalg.c diff --git a/lib/LuaJIT/lua.h b/lib/LuaJIT/src/lua.h index 850bd79..850bd79 100644 --- a/lib/LuaJIT/lua.h +++ b/lib/LuaJIT/src/lua.h diff --git a/lib/LuaJIT/lua.hpp b/lib/LuaJIT/src/lua.hpp index 07e9002..07e9002 100644 --- a/lib/LuaJIT/lua.hpp +++ b/lib/LuaJIT/src/lua.hpp diff --git a/lib/LuaJIT/luaconf.h b/lib/LuaJIT/src/luaconf.h index c2d29d9..c2d29d9 100644 --- a/lib/LuaJIT/luaconf.h +++ b/lib/LuaJIT/src/luaconf.h diff --git a/lib/LuaJIT/luajit.c b/lib/LuaJIT/src/luajit.c index 86134ef..86134ef 100644 --- a/lib/LuaJIT/luajit.c +++ b/lib/LuaJIT/src/luajit.c diff --git a/lib/LuaJIT/luajit.h b/lib/LuaJIT/src/luajit.h index 708a5a1..708a5a1 100644 --- a/lib/LuaJIT/luajit.h +++ b/lib/LuaJIT/src/luajit.h diff --git a/lib/LuaJIT/lualib.h b/lib/LuaJIT/src/lualib.h index bfc130a..bfc130a 100644 --- a/lib/LuaJIT/lualib.h +++ b/lib/LuaJIT/src/lualib.h diff --git a/lib/LuaJIT/msvcbuild.bat b/lib/LuaJIT/src/msvcbuild.bat index 71bde75..71bde75 100644 --- a/lib/LuaJIT/msvcbuild.bat +++ b/lib/LuaJIT/src/msvcbuild.bat diff --git a/lib/LuaJIT/ps4build.bat b/lib/LuaJIT/src/ps4build.bat index e4a7def..e4a7def 100644 --- a/lib/LuaJIT/ps4build.bat +++ b/lib/LuaJIT/src/ps4build.bat diff --git a/lib/LuaJIT/psvitabuild.bat b/lib/LuaJIT/src/psvitabuild.bat index 3991dc6..3991dc6 100644 --- a/lib/LuaJIT/psvitabuild.bat +++ b/lib/LuaJIT/src/psvitabuild.bat diff --git a/lib/LuaJIT/vm_arm.dasc b/lib/LuaJIT/src/vm_arm.dasc index 780cc16..780cc16 100644 --- a/lib/LuaJIT/vm_arm.dasc +++ b/lib/LuaJIT/src/vm_arm.dasc diff --git a/lib/LuaJIT/vm_arm64.dasc b/lib/LuaJIT/src/vm_arm64.dasc index fb226e3..fb226e3 100644 --- a/lib/LuaJIT/vm_arm64.dasc +++ b/lib/LuaJIT/src/vm_arm64.dasc diff --git a/lib/LuaJIT/vm_mips.dasc b/lib/LuaJIT/src/vm_mips.dasc index f324812..f324812 100644 --- a/lib/LuaJIT/vm_mips.dasc +++ b/lib/LuaJIT/src/vm_mips.dasc diff --git a/lib/LuaJIT/vm_mips64.dasc b/lib/LuaJIT/src/vm_mips64.dasc index 1682c81..1682c81 100644 --- a/lib/LuaJIT/vm_mips64.dasc +++ b/lib/LuaJIT/src/vm_mips64.dasc diff --git a/lib/LuaJIT/vm_ppc.dasc b/lib/LuaJIT/src/vm_ppc.dasc index 0839668..0839668 100644 --- a/lib/LuaJIT/vm_ppc.dasc +++ b/lib/LuaJIT/src/vm_ppc.dasc diff --git a/lib/LuaJIT/vm_x64.dasc b/lib/LuaJIT/src/vm_x64.dasc index a003fb4..a003fb4 100644 --- a/lib/LuaJIT/vm_x64.dasc +++ b/lib/LuaJIT/src/vm_x64.dasc diff --git a/lib/LuaJIT/vm_x86.dasc b/lib/LuaJIT/src/vm_x86.dasc index 211ae7b..211ae7b 100644 --- a/lib/LuaJIT/vm_x86.dasc +++ b/lib/LuaJIT/src/vm_x86.dasc diff --git a/lib/LuaJIT/xb1build.bat b/lib/LuaJIT/src/xb1build.bat index 847e84a..847e84a 100644 --- a/lib/LuaJIT/xb1build.bat +++ b/lib/LuaJIT/src/xb1build.bat diff --git a/lib/LuaJIT/xedkbuild.bat b/lib/LuaJIT/src/xedkbuild.bat index 240ec87..240ec87 100644 --- a/lib/LuaJIT/xedkbuild.bat +++ b/lib/LuaJIT/src/xedkbuild.bat diff --git a/lib/libentityx.a b/lib/libentityx.a Binary files differindex a7eb45f..c1b2a0d 100644 --- a/lib/libentityx.a +++ b/lib/libentityx.a |