aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorClyne Sullivan <clyne@bitgloo.com>2024-06-14 22:11:54 -0400
committerClyne Sullivan <clyne@bitgloo.com>2024-06-14 22:11:54 -0400
commit47d7c964d075f92b9ce657f1b946f5bd6895439b (patch)
treed463bbd15e1fdd92f9cf2f199ca489da6634f4cf
parent6526c644f40d469f064ee91f94bb0e8f6e7c39de (diff)
initial commit
-rw-r--r--Makefile13
-rw-r--r--README.md5
-rw-r--r--main.cpp370
-rw-r--r--support.c35
-rw-r--r--test.fp29
5 files changed, 451 insertions, 1 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..65163ff
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,13 @@
+all: main
+
+main: main.cpp
+ $(CXX) -o $@ $^ `llvm-config --cxxflags --ldflags --system-libs --libs core` -std=c++20 -ggdb -O0 -g3
+
+prog: main test.fp
+ ./main < test.fp 2> forsp.ir
+ llc -march=x86 -filetype=obj --relocation-model=pic forsp.ir -O1
+ clang -c support.c -m32 -Os
+ clang support.o forsp.ir.o -m32 -Os
+
+clean:
+ rm -f a.out main *.ir *.o
diff --git a/README.md b/README.md
index c54de9d..9e2199f 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,6 @@
# forspll
-Forsp LLVM-based compiler \ No newline at end of file
+forspll is an implementation of the [Forsp](https://github.com/xorvoid/forsp) language as an LLVM-based compiler.
+
+TODO
+
diff --git a/main.cpp b/main.cpp
new file mode 100644
index 0000000..e963340
--- /dev/null
+++ b/main.cpp
@@ -0,0 +1,370 @@
+#include <algorithm>
+#include <cctype>
+#include <iostream>
+#include <list>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <string_view>
+#include <tuple>
+
+#include <llvm/ADT/APInt.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/Type.h>
+#include <llvm/IR/DerivedTypes.h>
+
+static std::unique_ptr<llvm::LLVMContext> llvmContext;
+static std::unique_ptr<llvm::Module> llvmModule;
+static std::unique_ptr<llvm::IRBuilder<>> llvmBuilder;
+static llvm::Constant *llvmSp;
+static llvm::Constant *llvmStack;
+
+struct Var {
+ llvm::Value *value;
+ bool callable;
+
+ Var(llvm::Value *v = nullptr, bool c = false): value(v), callable(c) {}
+};
+static std::list<std::map<std::string, Var>> llvmVars;
+
+Var llvmVarGet(const std::string& name, int skip = 0) {
+ for (auto sc = llvmVars.rbegin(); sc != llvmVars.rend(); ++sc) {
+ if (skip > 0) {
+ --skip;
+ continue;
+ }
+ if (sc->contains(name))
+ return (*sc)[name];
+ }
+
+ return {};
+}
+
+struct BaseAST
+{
+ std::string name;
+
+ BaseAST(const std::string& n): name(n) {}
+
+ virtual ~BaseAST() = default;
+
+ virtual llvm::Value *codegen() const { return nullptr; }
+};
+
+struct NumberAST : public BaseAST
+{
+ // push number onto stack
+
+ explicit NumberAST(const std::string& n): BaseAST(n) {}
+
+ llvm::Value *codegen() const override {
+ auto inttype = llvm::Type::getInt32Ty(*llvmContext);
+ auto stacktype = llvm::VectorType::get(inttype, 12, false);
+ auto dspval = llvmBuilder->CreateLoad(inttype, llvmSp);
+ auto one = llvm::ConstantInt::get(inttype, 1);
+ auto zero = llvm::ConstantInt::get(inttype, 0);
+ auto inc = llvmBuilder->CreateAdd(dspval, one);
+ llvmBuilder->CreateStore(inc, llvmSp, false);
+
+ auto dsget = llvmBuilder->CreateGEP(stacktype, llvmStack, {zero, dspval});
+
+ auto val = llvm::ConstantInt::get(*llvmContext, llvm::APInt(32, std::stoi(name), true));
+ return llvmBuilder->CreateStore(val, dsget);
+ }
+};
+
+struct PushAST : public BaseAST
+{
+ // push named value to stack
+
+ explicit PushAST(const std::string& n): BaseAST(n) {}
+
+ llvm::Value *codegen() const override {
+ if (auto [var, thunk] = llvmVarGet(name); var) {
+ auto inttype = llvm::Type::getInt32Ty(*llvmContext);
+ auto stacktype = llvm::VectorType::get(inttype, 12, false);
+ auto dspval = llvmBuilder->CreateLoad(inttype, llvmSp);
+ auto one = llvm::ConstantInt::get(inttype, 1);
+ auto zero = llvm::ConstantInt::get(inttype, 0);
+ auto inc = llvmBuilder->CreateAdd(dspval, one);
+ llvmBuilder->CreateStore(inc, llvmSp, false);
+
+ auto dsget = llvmBuilder->CreateGEP(stacktype, llvmStack, {zero, dspval});
+
+ if (!thunk)
+ var = llvmBuilder->CreateLoad(inttype, var);
+ return llvmBuilder->CreateStore(var, dsget);
+ } else {
+ return nullptr;
+ }
+ }
+};
+
+struct PopAST : public BaseAST
+{
+ // pop value on stack to named var
+
+ explicit PopAST(const std::string& n): BaseAST(n) {}
+
+ llvm::Value *codegen() const override {
+ auto inttype = llvm::Type::getInt32Ty(*llvmContext);
+ auto stacktype = llvm::VectorType::get(inttype, 12, false);
+ auto one = llvm::ConstantInt::get(inttype, 1);
+ auto zero = llvm::ConstantInt::get(inttype, 0);
+ auto dspval = llvmBuilder->CreateLoad(inttype, llvmSp);
+ auto dec = llvmBuilder->CreateSub(dspval, one);
+ auto gep = llvmBuilder->CreateGEP(stacktype, llvmStack, {zero, dec});
+ llvmBuilder->CreateStore(dec, llvmSp, false);
+
+ auto var = new llvm::GlobalVariable(*llvmModule, inttype, false, llvm::GlobalValue::InternalLinkage, zero, name);
+ llvmBuilder->CreateStore(llvmBuilder->CreateLoad(inttype, gep), var, false);
+
+ auto [it, _] = llvmVars.back().emplace(name, var);
+ return it->second.value;
+ }
+};
+
+struct CallAST : public BaseAST
+{
+ // invoke named invocable
+
+ explicit CallAST(const std::string& n): BaseAST(n) {}
+
+ llvm::Value *codegen() const override {
+ auto ftype = llvm::FunctionType::get(llvm::Type::getVoidTy(*llvmContext), {}, false);
+
+ if (auto [var, call] = llvmVarGet(name); var) {
+ if (call) {
+ return llvmBuilder->CreateCall(ftype, var);
+ } else {
+ auto inttype = llvm::Type::getInt32Ty(*llvmContext);
+ auto val = llvmBuilder->CreateLoad(inttype, var);
+ auto cast = llvmBuilder->CreateIntToPtr(val, inttype->getPointerTo());
+ return llvmBuilder->CreateCall(ftype, cast);
+ }
+ } else {
+ auto func = llvm::Function::Create(ftype, llvm::Function::ExternalLinkage, name, llvmModule.get());
+ llvmVars.front().emplace(name, Var {func, true});
+ return llvmBuilder->CreateCall(ftype, func);
+ }
+ }
+};
+
+struct ThunkAST : public BaseAST
+{
+ static int tcount;
+
+ std::list<std::unique_ptr<BaseAST>> body;
+ llvm::IRBuilderBase::InsertPoint parent;
+ llvm::Function *func;
+
+ explicit ThunkAST(): ThunkAST(std::string("__t") + std::to_string(tcount++)) {}
+
+ explicit ThunkAST(std::string n): BaseAST(n) {
+ parent = llvmBuilder->saveIP();
+ auto ftype = llvm::FunctionType::get(llvm::Type::getVoidTy(*llvmContext), {}, false);
+ func = llvm::Function::Create(ftype, llvm::Function::ExternalLinkage, name.c_str(), llvmModule.get());
+ auto BB = llvm::BasicBlock::Create(*llvmContext, "entry", func);
+ llvmBuilder->SetInsertPoint(BB);
+ }
+
+ llvm::Value *codegen() const override {
+ llvmBuilder->CreateRetVoid();
+ llvmBuilder->restoreIP(parent);
+
+ return func;
+ }
+};
+int ThunkAST::tcount = 0;
+
+enum class Token {
+ none,
+ ThunkOpen,
+ ThunkClose,
+ Quote,
+ PopVar,
+ PushVar,
+ Var,
+ Number
+};
+
+static std::string name;
+static std::list<ThunkAST> scope;
+
+bool isname(char ch) {
+ return !isspace(ch) && ch != ')';
+}
+
+std::string_view extractName(std::string_view sv)
+{
+ name.clear();
+
+ while (!sv.empty()) {
+ const auto ch = sv.front();
+
+ if (isname(ch)) {
+ name += ch;
+ sv.remove_prefix(1);
+ } else {
+ break;
+ }
+ }
+
+ return sv;
+}
+
+std::pair<std::string_view, Token> nextToken(std::string_view sv)
+{
+ if (sv.empty())
+ return {sv, Token::none};
+
+ while (std::isspace(sv.front()))
+ sv.remove_prefix(1);
+
+ if (sv.empty())
+ return {sv, Token::none};
+
+ const auto ch = sv.front();
+ if (ch == ';') {
+ return {{}, Token::none};
+ } else if (ch == '(') {
+ return {sv.substr(1), Token::ThunkOpen};
+ } else if (ch == ')') {
+ return {sv.substr(1), Token::ThunkClose};
+ } else if (ch == '\'') {
+ return {extractName(sv.substr(1)), Token::Quote};
+ } else if (ch == '$') {
+ return {extractName(sv.substr(1)), Token::PopVar};
+ } else if (ch == '^') {
+ return {extractName(sv.substr(1)), Token::PushVar};
+ } else if (isdigit(ch) || (ch == '-' && sv.size() > 1 && isdigit(sv[1]))) {
+ return {extractName(sv), Token::Number};
+ } else if (isname(ch)) {
+ return {extractName(sv), Token::Var};
+ }
+
+ return {sv, Token::none};
+}
+
+void printToken(Token tok)
+{
+ switch (tok) {
+ case Token::ThunkOpen:
+ std::cout << "ThunkOpen ";
+ break;
+ case Token::ThunkClose:
+ std::cout << "ThunkClose ";
+ break;
+ case Token::Quote:
+ std::cout << "Quote ";
+ break;
+ case Token::PopVar:
+ std::cout << "PopVar ";
+ break;
+ case Token::PushVar:
+ std::cout << "PushVar ";
+ break;
+ case Token::Var:
+ std::cout << "Var ";
+ break;
+ case Token::Number:
+ std::cout << "Number ";
+ break;
+ case Token::none:
+ //std::cout << "none ";
+ break;
+ }
+}
+
+bool parseString(std::string_view sv)
+{
+ do {
+ const auto [nsv, tok] = nextToken(sv);
+
+ //printToken(tok);
+
+ if (tok == Token::none && !nsv.empty()) {
+ std::cerr << "unknown " << nsv << std::endl;
+ break;
+ } else {
+ std::unique_ptr<BaseAST> expr;
+
+ switch (tok) {
+ case Token::ThunkOpen:
+ if (scope.empty())
+ scope.emplace_back("main");
+ else
+ scope.emplace_back();
+ llvmVars.emplace_back();
+ break;
+ case Token::ThunkClose:
+ {
+ auto& thunk = scope.back();
+ auto gen = thunk.codegen();
+ llvmVars.pop_back();
+ llvmVars.back().emplace(thunk.name, Var {gen, true});
+ expr.reset(new PushAST {thunk.name});
+ scope.pop_back();
+ }
+ break;
+ case Token::Quote:
+ break;
+ case Token::PopVar:
+ expr.reset(new PopAST {name});
+ break;
+ case Token::PushVar:
+ expr.reset(new PushAST {name});
+ break;
+ case Token::Var:
+ expr.reset(new CallAST {name});
+ break;
+ case Token::Number:
+ expr.reset(new NumberAST {name});
+ break;
+ case Token::none:
+ break;
+ }
+
+ if (expr && !scope.empty()) {
+ expr->codegen();
+ //scope.back().body.emplace_back().swap(expr);
+ }
+ }
+
+ sv = nsv;
+ } while (!sv.empty());
+
+ return true;
+}
+
+int main()
+{
+ llvmContext = std::make_unique<llvm::LLVMContext>();
+ llvmModule = std::make_unique<llvm::Module>("forsp", *llvmContext);
+ llvmBuilder = std::make_unique<llvm::IRBuilder<>>(*llvmContext);
+ auto inttype = llvm::Type::getInt32Ty(*llvmContext);
+ auto stacktype = llvm::VectorType::get(inttype, 12, false);
+ auto zero = llvm::ConstantInt::get(inttype, 0);
+ auto zerovec = llvm::ConstantVector::get(llvm::ArrayRef(dynamic_cast<llvm::Constant *>(zero)));
+ llvmStack = new llvm::GlobalVariable(*llvmModule, stacktype, false, llvm::GlobalValue::ExternalLinkage, zerovec, "stack");
+ llvmSp = new llvm::GlobalVariable(*llvmModule, inttype, false, llvm::GlobalValue::ExternalLinkage, zero, "sp");
+
+ llvmVars.emplace_back();
+
+ std::string line;
+ while (std::cin.good()) {
+ std::getline(std::cin, line);
+ parseString(line);
+ //std::cout << std::endl;
+ }
+
+ //std::cout << "LLVM:" << std::endl;
+ llvmModule->print(llvm::errs(), nullptr);
+
+ std::cout << std::endl;
+}
+
diff --git a/support.c b/support.c
new file mode 100644
index 0000000..cc0304f
--- /dev/null
+++ b/support.c
@@ -0,0 +1,35 @@
+#include <stdio.h>
+#include <stdint.h>
+
+extern int32_t sp;
+extern int32_t stack;
+
+void emit()
+{
+ putchar(*(&stack + --sp));
+}
+
+void sub()
+{
+ int32_t *st = &stack;
+ st[sp - 2] -= st[sp - 1];
+ --sp;
+}
+
+void cswap()
+{
+ int32_t *st = &stack;
+ --sp;
+ if (st[sp]) {
+ int32_t tmp = st[sp - 1];
+ st[sp - 1] = st[sp - 2];
+ st[sp - 2] = tmp;
+ }
+}
+
+void eq()
+{
+ int32_t *st = &stack;
+ --sp;
+ st[sp - 1] = st[sp - 1] == st[sp];
+}
diff --git a/test.fp b/test.fp
new file mode 100644
index 0000000..015f2ee
--- /dev/null
+++ b/test.fp
@@ -0,0 +1,29 @@
+(
+ ; core utilities
+ ($x ^x ^x) $dup
+ ($_) $drop
+ ($x $y ^x ^y) $swap
+ ($a $b $c ^b ^a ^c) $rot
+ (sub) $-
+ (0 swap - -) $+
+ (()) $nil
+ (nil eq) $null?
+ ($x x) $force
+ (10 emit) $cr
+
+ ; recursion via y-combinator
+ ($f ($x (^x x) f) dup force) $Y ($g (^g Y)) $rec
+
+ ; if-stmt
+ ($c $t $f c ^f ^t rot cswap $_ force) $if
+ ($f $t $c $fn ^f ^t ^c fn) $endif
+
+ ; range
+ ($self $body $start $end
+ ^if (^start ^end eq) nil
+ (^start body ^end ^start 1 + ^body self)
+ endif
+ ) rec $do
+
+ 70 65 ^emit do cr
+)