From 9b90d6fe67e80a782c69791a8e3ef916cb6a4499 Mon Sep 17 00:00:00 2001 From: Clyne Sullivan Date: Fri, 29 Nov 2024 21:54:29 -0500 Subject: [PATCH] compile time parsing --- core.fth | 13 +- forth.hpp | 345 ++++++++++++++++++++++++++++++++++++------------------ main.cpp | 4 +- 3 files changed, 237 insertions(+), 125 deletions(-) diff --git a/core.fth b/core.fth index 96dc0f2..c3746c3 100644 --- a/core.fth +++ b/core.fth @@ -1,23 +1,20 @@ -: cell+ cell + ; -: cells cell * ; -: char+ 1 + ; : chars ; : state [ _d 7 cells + ] literal ; \ : [ 0 state ! ; immediate \ : ] -1 state ! ; -: sp _d ; -: rp [ _d cell+ ] literal ; -: dp [ _d 3 cells + ] literal ; +: sp [ _d cell+ ] literal ; +: rp [ _d 2 cells + ] literal ; +: dp [ _d 4 cells + ] literal ; : sp@ sp @ ; : rp@ rp @ cell+ ; -: ip [ _d cell+ cell+ ] literal ; +: ip [ _d 3 cells + ] literal ; : here dp @ ; : unused [ _d 8 cells + ] literal @ here - ; : base [ _d 9 cells + ] literal ; -: latest [ _d 4 cells + ] literal @ ; +: latest _d @ ; \ : dup sp@ @ ; \ : drop sp@ cell+ sp ! ; diff --git a/forth.hpp b/forth.hpp index 48ebbc0..14c7fb0 100644 --- a/forth.hpp +++ b/forth.hpp @@ -29,12 +29,159 @@ #include #include -struct forth +using cell = std::intptr_t; +using addr = std::uintptr_t; +using func = void (*)(const void *); + +struct word_base; + +struct word_list { - using cell = std::intptr_t; - using addr = std::uintptr_t; - using func = void (*)(const void *); + const word_base *next; + + constexpr word_list(const word_base *n = nullptr): next{n} {} + + std::optional get(std::string_view sv) const; + + static constexpr auto parse(const char *source, std::size_t& sourcei) -> std::string_view { + const std::string_view sv {source}; + const auto e = sv.find_first_of(" \t\r\n", sourcei); + const auto word = e != std::string_view::npos ? + sv.substr(sourcei, e - sourcei) : sv.substr(sourcei); + + sourcei = sv.find_first_not_of(" \t\r\n", e); + return word; + } +}; + +struct word_base : public word_list +{ + static constexpr addr immediate = 1 << 8; + + addr flags_len; + + constexpr word_base(const word_base *n, addr fl): + word_list{n}, flags_len{fl} {} + + std::string_view name() const { + return {std::bit_cast(this + 1)}; + } + + const func *body() const { + const auto ptr = std::bit_cast(this + 1); + const auto fptr = ptr + (flags_len & 0xFF); + return std::bit_cast(fptr); + } + + constexpr void make_immediate() { + flags_len |= immediate; + } +}; + +template +struct ctstring { + char data[N]; + + consteval ctstring(const char (&s)[N]) { + std::copy(s, s + N, data); + } + consteval operator const char *() const { + return data; + } + consteval auto size() const { + return N; + } +}; + +template +struct comp_word : public word_base +{ + constexpr static auto N = (sizeof(Name) + sizeof(cell) - 1) & ~(sizeof(cell) - 1); + static constexpr std::size_t B = + [] { + std::size_t b = 1; + std::string_view sv {Body.data}; + auto sourcei = sv.find_first_not_of(" \t\r\n"); + while (sourcei != std::string_view::npos) { + const auto word = parse(Body.data, sourcei); + + b++; + if (!Prev.get_ct(word)) + b++; + } + return b; + }(); + + union bodyt { + const func *f; + cell c; + }; + + std::array namebuf; + const func prologue; + std::array bodybuf; + + consteval const func *get_ct(std::string_view name) const { + if (name == std::string_view{Name.data}) + return &prologue; + else + return Prev.get_ct(name); + } + + consteval comp_word(const func prol, addr flags = 0): + word_base{&Prev, N | flags}, namebuf{}, prologue{prol}, bodybuf{} + { + std::copy(Name.data, Name.data + sizeof(Name), namebuf.data()); + + auto bptr = bodybuf.begin(); + std::string_view sv {Body}; + auto sourcei = sv.find_first_not_of(" \t\r\n"); + while (sourcei != std::string_view::npos) { + const auto word = parse(Body, sourcei); + + auto w = get_ct(word); + if (w) { + bptr->f = get_ct(word); + bptr++; + } else { + cell n; + std::from_chars(word.cbegin(), word.cend(), n, 10); + + bptr->f = get_ct("_lit"); + bptr++; + bptr->c = n; + bptr++; + } + } + } +}; + +template +struct native_word : public word_base +{ + constexpr static auto N = (sizeof(Name) + sizeof(cell) - 1) & ~(sizeof(cell) - 1); + std::array namebuf; + func body; + + consteval const func *get_ct(std::string_view name) const { + if (name == std::string_view{Name.data}) + return &body; + else if constexpr (Prev != nullptr) + return Prev->get_ct(name); + else + return nullptr; + } + + consteval native_word(func bod, addr flags = 0): + word_base{Prev, N | flags}, namebuf{}, body{bod} + { + std::copy(Name.data, Name.data + sizeof(Name), namebuf.data()); + } +}; + +struct forth : public word_list +{ static constexpr bool enable_exceptions = true; static constexpr int data_size = 16; static constexpr int return_size = 16; @@ -62,47 +209,6 @@ struct forth } } - struct word_base { - static constexpr addr immediate = 1 << 8; - - const word_base *next; - addr flags_len; - - auto name() const -> std::string_view { - return {std::bit_cast(this + 1)}; - } - - auto body() const -> const func * { - const auto ptr = std::bit_cast(this + 1); - const auto fptr = ptr + (flags_len & 0xFF); - return std::bit_cast(fptr); - } - - constexpr void make_immediate() { - flags_len |= immediate; - } - }; - - template - struct word : public word_base { - std::array name; - func body; - - template - consteval word(const char (&nam)[N], - func bod = nullptr, - const word_base *prev = nullptr, - addr flags = 0): - word_base{prev, L | flags}, name{}, body{bod} - { - std::copy(nam, nam + N, name.begin()); - } - }; - - template - word(const char (&nam)[N], func b = nullptr, const word_base *w = nullptr, - addr flags = 0) -> word<(N + sizeof(cell)) & ~(sizeof(cell) - 1)>; - void push(cell v) { assert(sp != dstack.begin()); *--sp = v; @@ -151,7 +257,7 @@ struct forth //assert(state->here + size < &dictionary.back()); const auto h = std::exchange(here, here + size); - latest = new (h) word_base (latest, namesz); + next = new (h) word_base (next, namesz); std::copy(name.begin(), name.end(), std::bit_cast(h) + sizeof(word_base)); if (entry) @@ -159,6 +265,10 @@ struct forth return *this; } + auto parse() -> std::string_view { + return word_list::parse(source, sourcei); + } + void parse_line(std::string_view sv) { source = sv.data(); sourcei = sv.find_first_not_of(" \t\r\n"); @@ -189,35 +299,16 @@ struct forth } } - auto parse() -> std::string_view { - const std::string_view sv {source}; - - const auto e = sv.find_first_of(" \t\r\n", sourcei); - const auto word = e != npos ? sv.substr(sourcei, e - sourcei) - : sv.substr(sourcei); - - sourcei = sv.find_first_not_of(" \t\r\n", e); - return word; - } - void execute(const func *body) { assert(body && *body); (*body)(body); } - auto get(std::string_view sv) -> std::optional { - for (auto lt = latest; lt; lt = lt->next) { - if (sv == lt->name()) - return lt; - } - - return {}; - } - template - static void prologue(func *body) { + static void prologue(const void *bodyf) { static auto& fth = **fthp; + auto body = (func *)bodyf; fth.rpush(fth.ip); for (fth.ip = body + 1; *fth.ip; fth.ip++) @@ -251,7 +342,7 @@ struct forth auto f_lbrac = [](auto) { fth.compiling = false; }; auto f_rbrac = [](auto) { fth.compiling = true; }; auto f_imm = [](auto) { - const_cast(fth.latest)->make_immediate(); }; + const_cast(fth.next)->make_immediate(); }; auto f_lit = [](auto) { //assert(fth.compiling); *fth.here++ = std::bit_cast(&lit_impl); @@ -307,42 +398,57 @@ struct forth *fth.here++ = std::bit_cast((*g)->body()); }; - constexpr static word w_dict {"_d", f_dict}; - constexpr static word w_liti {"_lit", lit_impl, &w_dict}; - constexpr static word w_add {"+", f_add, &w_liti}; - constexpr static word w_minus {"-", f_minus, &w_add}; - constexpr static word w_times {"*", f_times, &w_minus}; - constexpr static word w_divide {"/", f_divide, &w_times}; - constexpr static word w_mod {"mod", f_mod, &w_divide}; - constexpr static word w_bitand {"and", f_bitand, &w_mod}; - constexpr static word w_bitor {"or", f_bitor, &w_bitand}; - constexpr static word w_bitxor {"xor", f_bitxor, &w_bitor}; - constexpr static word w_lshift {"lshift", f_lshift, &w_bitxor}; - constexpr static word w_rshift {"rshift", f_rshift, &w_lshift}; - constexpr static word w_lbrac {"[", f_lbrac, &w_rshift, word_base::immediate}; - constexpr static word w_rbrac {"]", f_rbrac, &w_lbrac}; - constexpr static word w_imm {"immediate", f_imm, &w_rbrac}; - constexpr static word w_lit {"literal", f_lit, &w_imm, word_base::immediate}; - constexpr static word w_peek {"@", f_peek, &w_lit}; - constexpr static word w_poke {"!", f_poke, &w_peek}; - constexpr static word w_cpeek {"c@", f_cpeek, &w_poke}; - constexpr static word w_cpoke {"c!", f_cpoke, &w_cpeek}; - constexpr static word w_swap {"swap", f_swap, &w_cpoke}; - constexpr static word w_drop {"drop", f_drop, &w_swap}; - constexpr static word w_dup {"dup", f_dup, &w_drop}; - constexpr static word w_rot {"rot", f_rot, &w_dup}; - constexpr static word w_eq {"=", f_eq, &w_rot}; - constexpr static word w_lt {"<", f_lt, &w_eq}; - constexpr static word w_tick {"\'", f_tick, &w_lt}; - constexpr static word w_colon {":", f_colon, &w_tick}; - constexpr static word w_semic {";", f_semic, &w_colon, word_base::immediate}; - constexpr static word w_comm {"\\", f_comm, &w_semic, word_base::immediate}; - constexpr static word w_cell {"cell", f_cell, &w_comm}; - constexpr static word w_jmp {"_jmp", f_jmp, &w_cell}; - constexpr static word w_jmp0 {"_jmp0", f_jmp0, &w_jmp}; - constexpr static word w_postp {"postpone", f_postpone, &w_jmp0, word_base::immediate}; - - fth.latest = &w_postp; + constexpr static native_word<"_d"> w_dict {f_dict}; + constexpr static native_word<"_lit", &w_dict> w_liti {lit_impl}; + constexpr static native_word<"swap", &w_liti> w_swap {f_swap}; + constexpr static native_word<"drop", &w_swap> w_drop {f_drop}; + constexpr static native_word<"dup", &w_drop> w_dup {f_dup}; + constexpr static native_word<"rot", &w_dup> w_rot {f_rot}; + constexpr static native_word<"+", &w_rot> w_add {f_add}; + constexpr static native_word<"-", &w_add> w_minus {f_minus}; + constexpr static native_word<"*", &w_minus> w_times {f_times}; + constexpr static native_word<"/", &w_times> w_divid {f_divide}; + constexpr static native_word<"mod", &w_divid> w_mod {f_mod}; + constexpr static native_word<"and", &w_mod> w_and {f_bitand}; + constexpr static native_word<"or", &w_and> w_or {f_bitor}; + constexpr static native_word<"xor", &w_or> w_xor {f_bitxor}; + constexpr static native_word<"lshift", &w_xor> w_lsh {f_lshift}; + constexpr static native_word<"rshift", &w_lsh> w_rsh {f_rshift}; + constexpr static native_word<"[", &w_rsh> w_lbrac {f_lbrac, + word_base::immediate}; + constexpr static native_word<"]", &w_lbrac> w_rbrac {f_rbrac}; + constexpr static native_word<"immediate", &w_rbrac> w_imm {f_imm}; + constexpr static native_word<"literal", &w_imm> w_lit {f_lit, + word_base::immediate}; + constexpr static native_word<"@", &w_lit> w_peek {f_peek}; + constexpr static native_word<"!", &w_peek> w_poke {f_poke}; + constexpr static native_word<"c@", &w_poke> w_cpeek {f_cpeek}; + constexpr static native_word<"c!", &w_cpeek> w_cpoke {f_cpoke}; + constexpr static native_word<"=", &w_cpoke> w_eq {f_eq}; + constexpr static native_word<"<", &w_eq> w_lt {f_lt}; + constexpr static native_word<"\'", &w_lt> w_tick {f_tick}; + constexpr static native_word<":", &w_tick> w_colon {f_colon}; + constexpr static native_word<";", &w_colon> w_semic {f_semic, + word_base::immediate}; + constexpr static native_word<"\\", &w_semic> w_comm {f_comm, + word_base::immediate}; + constexpr static native_word<"cell", &w_comm> w_cell {f_cell}; + constexpr static native_word<"_jmp", &w_cell> w_jmp {f_jmp}; + constexpr static native_word<"_jmp0", &w_jmp> w_jmp0 {f_jmp0}; + constexpr static native_word<"postpone", &w_jmp0> w_postp {f_postpone, + word_base::immediate}; + constexpr static comp_word<"cell+", "cell +", w_postp> w_cellp + {forth::prologue}; + constexpr static comp_word<"cells", "cell *", w_cellp> w_cells + {forth::prologue}; + constexpr static comp_word<"char+", "1 +", w_cells> w_charp + {forth::prologue}; + constexpr static comp_word<"1+", "1 +", w_charp> w_inc + {forth::prologue}; + constexpr static comp_word<"1-", "1 -", w_inc> w_dec + {forth::prologue}; + + fth.next = &w_dec; fth.end = end_value; } @@ -372,7 +478,6 @@ struct forth func **rp; func *ip = nullptr; cell *here = std::bit_cast(this + 1); - const word_base *latest = nullptr; const char *source = nullptr; std::size_t sourcei = npos; cell compiling = false; @@ -382,16 +487,26 @@ struct forth std::array rstack; }; -static_assert(offsetof(forth::word_base, flags_len) == 1 * sizeof(forth::cell)); -static_assert(offsetof(forth, rp) == 1 * sizeof(forth::cell)); -static_assert(offsetof(forth, ip) == 2 * sizeof(forth::cell)); -static_assert(offsetof(forth, here) == 3 * sizeof(forth::cell)); -static_assert(offsetof(forth, latest) == 4 * sizeof(forth::cell)); -static_assert(offsetof(forth, source) == 5 * sizeof(forth::cell)); -static_assert(offsetof(forth, sourcei) == 6 * sizeof(forth::cell)); -static_assert(offsetof(forth, compiling) == 7 * sizeof(forth::cell)); -static_assert(offsetof(forth, end) == 8 * sizeof(forth::cell)); -static_assert(offsetof(forth, base) == 9 * sizeof(forth::cell)); +std::optional word_list::get(std::string_view sv) const +{ + for (auto lt = next; lt; lt = lt->next) { + if (sv == lt->name()) + return lt; + } + + return {}; +} + +//static_assert(offsetof(word_base, flags_len) == 1 * sizeof(cell)); +//static_assert(offsetof(forth, sp) == 1 * sizeof(cell)); +//static_assert(offsetof(forth, rp) == 2 * sizeof(cell)); +//static_assert(offsetof(forth, ip) == 3 * sizeof(cell)); +//static_assert(offsetof(forth, here) == 4 * sizeof(cell)); +//static_assert(offsetof(forth, source) == 5 * sizeof(cell)); +//static_assert(offsetof(forth, sourcei) == 6 * sizeof(cell)); +//static_assert(offsetof(forth, compiling) == 7 * sizeof(cell)); +//static_assert(offsetof(forth, end) == 8 * sizeof(cell)); +//static_assert(offsetof(forth, base) == 9 * sizeof(cell)); #endif // SFORTH_HPP diff --git a/main.cpp b/main.cpp index d3bbe42..c3267ba 100644 --- a/main.cpp +++ b/main.cpp @@ -22,7 +22,7 @@ #include #include -static std::array dict; +static std::array dict; static auto fth = new (dict.data()) forth; static bool parse_stream(forth *, std::istream&, bool say_okay = false); @@ -38,7 +38,7 @@ int main(int argc, const char *argv[]) std::cout << buf << ' '; }); fth->add("emit", [](auto) { std::cout << static_cast(fth->pop()); }); - fth->add("dictsize", [](auto) { fth->push(dict.size() * sizeof(forth::cell)); }); + fth->add("dictsize", [](auto) { fth->push(dict.size() * sizeof(cell)); }); for (auto arg : args) { if (std::ifstream file {arg}; parse_stream(fth, file))