You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

514 lines
17 KiB
C++

/// sforth, an implementation of forth
/// Copyright (C) 2024 Clyne Sullivan <clyne@bitgloo.com>
///
/// This program is free software: you can redistribute it and/or modify it
/// under the terms of the GNU General Public License as published by the Free
/// Software Foundation, either version 3 of the License, or (at your option)
/// any later version.
///
/// This program is distributed in the hope that it will be useful, but WITHOUT
/// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
/// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
/// more details.
///
/// You should have received a copy of the GNU General Public License along
/// with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef SFORTH_HPP
#define SFORTH_HPP
#include <algorithm>
#include <array>
#include <bit>
#include <charconv>
#include <cstdint>
#include <cstddef>
#include <iterator>
#include <span>
#include <string_view>
#include <tuple>
#include <utility>
using cell = std::intptr_t;
using addr = std::uintptr_t;
using func = void (*)(const void *);
struct word_base;
struct word_list
{
const word_base *next;
constexpr word_list(const word_base *n = nullptr): next{n} {}
std::optional<const word_base *> get(std::string_view sv) const;
static constexpr auto parse(const char *source, std::size_t& sourcei) -> std::string_view {
const std::string_view sv {source};
const auto e = sv.find_first_of(" \t\r\n", sourcei);
const auto word = e != std::string_view::npos ?
sv.substr(sourcei, e - sourcei) : sv.substr(sourcei);
sourcei = sv.find_first_not_of(" \t\r\n", e);
return word;
}
};
struct word_base : public word_list
{
static constexpr addr immediate = 1 << 8;
addr flags_len;
constexpr word_base(const word_base *n, addr fl):
word_list{n}, flags_len{fl} {}
std::string_view name() const {
return {std::bit_cast<const char *>(this + 1)};
}
const func *body() const {
const auto ptr = std::bit_cast<const std::uint8_t *>(this + 1);
const auto fptr = ptr + (flags_len & 0xFF);
return std::bit_cast<const func *>(fptr);
}
constexpr void make_immediate() {
flags_len |= immediate;
}
};
template<unsigned N>
struct cS {
char data[N];
consteval cS(const char (&s)[N]) {
std::copy(s, s + N, data);
}
consteval operator const char *() const {
return data;
}
consteval auto size() const {
return N;
}
};
struct forth : public word_list
{
static constexpr bool enable_exceptions = true;
static constexpr int data_size = 16;
static constexpr int return_size = 16;
static constexpr auto npos = std::string_view::npos;
enum class error {
init_error,
parse_error,
execute_error,
dictionary_overflow,
word_not_found,
stack_underflow,
stack_overflow,
return_stack_underflow,
return_stack_overflow,
compile_only_word
};
template<error Err>
static inline void assert(bool condition) {
if constexpr (enable_exceptions) {
if (!condition)
throw Err;
}
}
void push(cell v) {
assert<error::stack_overflow>(sp != dstack.begin());
*--sp = v;
}
void push(cell v, auto... vs) {
push(v); (push(vs), ...);
}
void rpush(func *v) {
assert<error::return_stack_overflow>(rp != rstack.begin());
*--rp = v;
}
cell& top() {
assert<error::stack_underflow>(sp != dstack.end());
return *sp;
}
cell pop() {
assert<error::stack_underflow>(sp != dstack.end());
return *sp++;
}
auto rpop() -> func * {
assert<error::return_stack_underflow>(rp != rstack.end());
return *rp++;
}
template<int N>
auto pop() {
static_assert(N > 0, "pop<N>() with N <= 0");
auto t = std::tuple {pop()};
if constexpr (N > 1)
return std::tuple_cat(t, pop<N - 1>());
else
return t;
}
forth& add(std::string_view name, func entry = nullptr) {
const auto namesz = (name.size() + 1 + sizeof(cell) - 1) & ~(sizeof(cell) - 1);
const auto size = (sizeof(word_base) + namesz) / sizeof(cell);
assert<error::parse_error>(!name.empty());
//assert<error::dictionary_overflow>(state->here + size < &dictionary.back());
const auto h = std::exchange(here, here + size);
next = new (h) word_base (next, namesz);
std::copy(name.begin(), name.end(),
std::bit_cast<char *>(h) + sizeof(word_base));
if (entry)
*here++ = std::bit_cast<cell>(entry);
return *this;
}
auto parse() -> std::string_view {
return word_list::parse(source, sourcei);
}
void parse_line(std::string_view sv) {
source = sv.data();
sourcei = sv.find_first_not_of(" \t\r\n");
while (sourcei != npos) {
const auto word = parse();
if (auto ent = get(word); !ent) {
cell n;
const auto [p, e] = std::from_chars(word.cbegin(), word.cend(),
n, base);
assert<error::word_not_found>(e == std::errc() && p == word.cend());
push(n);
if (compiling)
execute((*get("literal"))->body());
} else {
auto body = (*ent)->body();
if (compiling && ((*ent)->flags_len & word_base::immediate) == 0) {
*here++ = std::bit_cast<cell>(body);
} else {
execute(body);
}
}
}
}
void execute(const func *body) {
assert<error::execute_error>(body && *body);
(*body)(body);
}
template<forth **fthp>
static void prologue(const void *bodyf) {
static auto& fth = **fthp;
auto body = (func *)bodyf;
fth.rpush(fth.ip);
for (fth.ip = body + 1; *fth.ip; fth.ip++)
fth.execute(std::bit_cast<func *>(*fth.ip));
fth.ip = fth.rpop();
}
template<forth** fthp>
static void initialize(cell *end_value);
static auto error_string(error err) noexcept -> std::string_view {
using enum error;
switch (err) {
case init_error: return "init error";
case parse_error: return "parse error";
case execute_error: return "execute error";
case dictionary_overflow: return "dictionary overflow";
case word_not_found: return "word not found";
case stack_underflow: return "stack underflow";
case stack_overflow: return "stack overflow";
case return_stack_underflow: return "return stack underflow";
case return_stack_overflow: return "return stack overflow";
case compile_only_word: return "compile only word";
default: return "unknown error";
}
}
constexpr forth() {
sp = dstack.end();
rp = rstack.end();
}
cell *sp;
func **rp;
func *ip = nullptr;
cell *here = std::bit_cast<cell *>(this + 1);
const char *source = nullptr;
std::size_t sourcei = npos;
cell compiling = false;
cell *end = nullptr;
cell base = 10;
std::array<cell, data_size> dstack;
std::array<func *, return_size> rstack;
};
template<cS Name, func Body, auto *Prev = (const word_base *)nullptr>
struct native_word : public word_base
{
constexpr static auto N = (sizeof(Name) + sizeof(cell) - 1) & ~(sizeof(cell) - 1);
std::array<char, N> namebuf;
func body;
consteval const func *get_ct(std::string_view name) const {
if (name == std::string_view{Name.data})
return &body;
else if constexpr (Prev != nullptr)
return Prev->get_ct(name);
else
return nullptr;
}
consteval native_word(addr flags = 0):
word_base{Prev, N | flags}, namebuf{}, body{Body}
{
std::copy(Name.data, Name.data + sizeof(Name), namebuf.data());
}
};
template<const func Prol, cS Name, cS Body, auto *Prev = (const word_base *)nullptr>
struct comp_word : public native_word<Name, Prol, Prev>
{
static constexpr std::size_t B =
[] {
std::size_t b = 1;
std::string_view sv {Body.data};
auto sourcei = sv.find_first_not_of(" \t\r\n");
while (sourcei != std::string_view::npos) {
const auto word = word_list::parse(Body.data, sourcei);
b++;
if (!Prev->get_ct(word))
b++;
}
return b;
}();
union bodyt {
const func *f;
cell c;
};
std::array<bodyt, B> bodybuf {};
consteval comp_word(addr flags = 0):
native_word<Name, Prol, Prev>{flags}
{
auto bptr = bodybuf.begin();
std::string_view sv {Body};
auto sourcei = sv.find_first_not_of(" \t\r\n");
while (sourcei != std::string_view::npos) {
const auto word = word_list::parse(Body, sourcei);
auto w = Prev->get_ct(word);
if (w) {
bptr->f = Prev->get_ct(word);
bptr++;
} else {
cell n;
std::from_chars(word.cbegin(), word.cend(), n, 10);
bptr->f = Prev->get_ct("_lit");
bptr++;
bptr->c = n;
bptr++;
}
}
}
};
template<cS Name, func Body, addr Flags, auto... Next>
struct native_dict
{
constexpr static native_word<Name, Body,
[] {
if constexpr (sizeof...(Next))
return &native_dict<Next...>::word;
else
return (const word_base *)nullptr;
}()> word {Flags};
};
template<func Prol, auto *Prev, cS Name, cS Body, addr Flags, auto... Next>
struct comp_dict
{
constexpr static comp_word<Prol, Name, Body,
[] {
if constexpr (sizeof...(Next))
return &comp_dict<Prol, Prev, Next...>::word;
else
return Prev;
}()> word {Flags};
};
std::optional<const word_base *> word_list::get(std::string_view sv) const
{
for (auto lt = next; lt; lt = lt->next) {
if (sv == lt->name())
return lt;
}
return {};
}
template<forth** fthp>
void forth::initialize(cell *end_value)
{
assert<error::init_error>(*fthp);
static auto& fth = **fthp;
static auto _d = std::bit_cast<cell>(*fthp);
constexpr static func lit_impl = [](auto) {
auto ptr = std::bit_cast<cell *>(++fth.ip);
fth.push(*ptr);
};
constexpr static auto& dict1 = native_dict<
cS{"_d" }, [](auto) { fth.push(_d); }, 0
, cS{"sp" }, [](auto) { fth.push(_d + sizeof(cell)); }, 0
, cS{"rp" }, [](auto) { fth.push(_d + 2 * sizeof(cell)); }, 0
, cS{"ip" }, [](auto) { fth.push(_d + 3 * sizeof(cell)); }, 0
, cS{"dp" }, [](auto) { fth.push(_d + 4 * sizeof(cell)); }, 0
, cS{"state"}, [](auto) { fth.push(_d + 7 * sizeof(cell)); }, 0
, cS{"base" }, [](auto) { fth.push(_d + 9 * sizeof(cell)); }, 0
, cS{"_lit" }, lit_impl, 0
, cS{"swap" }, [](auto) { auto [a, b] = fth.pop<2>(); fth.push(a, b); }, 0
, cS{"drop" }, [](auto) { fth.pop(); }, 0
, cS{"dup" }, [](auto) { fth.push(fth.top()); }, 0
, cS{"rot" }, [](auto) { auto [a, b, c] = fth.pop<3>(); fth.push(b, a, c); }, 0
, cS{"+" }, [](auto) { fth.top() += fth.pop(); }, 0
, cS{"-" }, [](auto) { fth.top() -= fth.pop(); }, 0
, cS{"*" }, [](auto) { fth.top() *= fth.pop(); }, 0
, cS{"/" }, [](auto) { fth.top() /= fth.pop(); }, 0
, cS{"mod" }, [](auto) { fth.top() %= fth.pop(); }, 0
, cS{"and" }, [](auto) { fth.top() &= fth.pop(); }, 0
, cS{"or" }, [](auto) { fth.top() |= fth.pop(); }, 0
, cS{"xor" }, [](auto) { fth.top() ^= fth.pop(); }, 0
, cS{"lshift"}, [](auto) { fth.top() <<= fth.pop(); }, 0
, cS{"rshift"}, [](auto) { fth.top() >>= fth.pop(); }, 0
, cS{"[" }, [](auto) { fth.compiling = false; }, word_base::immediate
, cS{"]" }, [](auto) { fth.compiling = true; }, 0
, cS{"immediate"}, [](auto) { const_cast<word_base *>(fth.next)->make_immediate(); }, 0
, cS{"literal"}, [](auto) {
//assert<error::compile_only_word>(fth.compiling);
*fth.here++ = std::bit_cast<cell>(&lit_impl);
*fth.here++ = fth.pop(); }, word_base::immediate
, cS{"@" }, [](auto) { fth.push(*std::bit_cast<cell *>(fth.pop())); }, 0
, cS{"!" }, [](auto) { auto [p, v] = fth.pop<2>(); *std::bit_cast<cell *>(p) = v; }, 0
, cS{"c@" }, [](auto) { fth.push(*std::bit_cast<char *>(fth.pop())); }, 0
, cS{"c!" }, [](auto) { auto [p, v] = fth.pop<2>(); *std::bit_cast<char *>(p) = v; }, 0
, cS{"=" }, [](auto) { auto v = fth.pop(); fth.top() = -(fth.top() == v); }, 0
, cS{"<" }, [](auto) { auto v = fth.pop(); fth.top() = -(fth.top() < v); }, 0
, cS{"\'" }, [](auto) {
auto w = fth.parse();
auto g = fth.get(w);
fth.push(g ? std::bit_cast<cell>((*g)->body()) : 0); }, 0
, cS{":" }, [](auto) {
const auto prologue = forth::prologue<fthp>;
auto w = fth.parse();
fth.add(w);
*fth.here++ = std::bit_cast<cell>(prologue);
fth.compiling = true; }, 0
, cS{";" }, [](auto) { *fth.here++ = 0; fth.compiling = false; }, word_base::immediate
, cS{"\\" }, [](auto) { fth.sourcei = npos; }, word_base::immediate
, cS{"cell" }, [](auto) { fth.push(sizeof(cell)); }, 0
, cS{"_jmp" }, [](auto) {
auto ptr = ++fth.ip;
fth.ip = *std::bit_cast<func **>(ptr) - 1; }, 0
, cS{"_jmp0"}, [](auto) {
auto ptr = ++fth.ip;
if (fth.pop() == 0)
fth.ip = *std::bit_cast<func **>(ptr) - 1; }, 0
, cS{"chars"}, [](auto) {}, 0
, cS{"postpone"}, [](auto) {
assert<error::compile_only_word>(fth.compiling);
auto w = fth.parse();
auto g = fth.get(w);
assert<error::word_not_found>(g.has_value());
*fth.here++ = std::bit_cast<cell>((*g)->body()); }, word_base::immediate
>::word;
constexpr static auto& dict2 = comp_dict<forth::prologue<fthp>, &dict1
, cS{"align" }, cS{"here dup aligned swap - allot"}, 0
, cS{"aligned"}, cS{"cell 1- + cell 1- invert and"}, 0
, cS{"decimal"}, cS{"10 base !"}, 0
, cS{"hex" }, cS{"16 base !"}, 0
, cS{"<=" }, cS{"2dup < >r = r> or"}, 0
, cS{"2!" }, cS{"swap over ! cell+ !"}, 0
, cS{"2@" }, cS{"dup cell+ @ swap @"}, 0
, cS{"c," }, cS{"here c! 1 allot"}, 0
, cS{"," }, cS{"here ! cell allot"}, 0
, cS{"allot" }, cS{"dp +!"}, 0
, cS{"+!" }, cS{"dup >r swap r> @ + swap !"}, 0
, cS{"2swap" }, cS{"rot >r rot r>"}, 0
, cS{"2dup" }, cS{"over over"}, 0
, cS{"2over" }, cS{"3 pick 3 pick"}, 0
, cS{">r" }, cS{"rp@ cell - rp ! rp@ cell+ @ rp@ ! rp@ cell+ !"}, 0
, cS{"r>" }, cS{"rp@ @ rp@ cell+ rp ! rp@ @ swap rp@ !"}, 0
, cS{"over" }, cS{"1 pick"}, 0
, cS{"pick" }, cS{"cells cell+ sp@ + @"}, 0
, cS{"sp@" }, cS{"sp @"}, 0
, cS{"rp@" }, cS{"rp @ cell+"}, 0
, cS{"here" }, cS{"dp @"}, 0
, cS{"latest"}, cS{"_d @"}, 0
, cS{"1-" }, cS{"1 -" }, 0
, cS{"1+" }, cS{"1 +" }, 0
, cS{"cell+" }, cS{"cell +"}, 0
, cS{"cells" }, cS{"cell *"}, 0
, cS{"char+" }, cS{"1 +" }, 0
, cS{"-rot" }, cS{"rot rot"}, 0
, cS{"2drop" }, cS{"drop drop"}, 0
, cS{"0=" }, cS{"0 ="}, 0
, cS{"0<" }, cS{"0 <"}, 0
, cS{"<>" }, cS{"= 0="}, 0
, cS{">" }, cS{"swap <"}, 0
, cS{"invert"}, cS{"-1 xor"}, 0
, cS{"negate"}, cS{"-1 *"}, 0
, cS{"2*" }, cS{"2 *"}, 0
, cS{"bl" }, cS{"32"}, 0
>::word;
fth.next = &dict2;
fth.end = end_value;
}
//static_assert(offsetof(word_base, flags_len) == 1 * sizeof(cell));
//static_assert(offsetof(forth, sp) == 1 * sizeof(cell));
//static_assert(offsetof(forth, rp) == 2 * sizeof(cell));
//static_assert(offsetof(forth, ip) == 3 * sizeof(cell));
//static_assert(offsetof(forth, here) == 4 * sizeof(cell));
//static_assert(offsetof(forth, source) == 5 * sizeof(cell));
//static_assert(offsetof(forth, sourcei) == 6 * sizeof(cell));
//static_assert(offsetof(forth, compiling) == 7 * sizeof(cell));
//static_assert(offsetof(forth, end) == 8 * sizeof(cell));
//static_assert(offsetof(forth, base) == 9 * sizeof(cell));
#endif // SFORTH_HPP