diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c43a6db --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +a.out +main +*.ir +*.o +*.sw* diff --git a/Makefile b/Makefile index 65163ff..c7761c0 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,11 @@ +CXXFILES := main.cpp parser.cpp llvm.cpp ast.cpp var.cpp +CXXFLAGS := `llvm-config --cxxflags` -std=c++20 -ggdb -O0 -g3 +LDFLAGS := `llvm-config --ldflags --system-libs --libs core` + all: main -main: main.cpp - $(CXX) -o $@ $^ `llvm-config --cxxflags --ldflags --system-libs --libs core` -std=c++20 -ggdb -O0 -g3 +main: $(subst .cpp,.o,$(CXXFILES)) + $(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS) prog: main test.fp ./main < test.fp 2> forsp.ir diff --git a/README.md b/README.md index 9e2199f..11c76e9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,23 @@ # forspll -forspll is an implementation of the [Forsp](https://github.com/xorvoid/forsp) language as an LLVM-based compiler. +forspll is an implementation of [Forsp](https://xorvoid.com/forsp.html) as an LLVM-based compiler. Forsp is a tiny yet very versatile programming language that mixes features of Forth and Lisp. Through LLVM, Forsp can be compiled into efficient machine code for a wide array of platforms. -TODO +forspll features: + +* Lisp-style S-expression syntax +* Forth-style data stack for parameters/values +* Linking with C functions (see `support.c`) + +Missing features: + +* Quote operator: `quote`/`'` +* Lists and atoms or any dynamic allocations + +## Building + +Requires Clang and LLVM development files. + +Run `make` to build the compiler. + +Run `make prog` to compile `test.fp` and `support.c`. diff --git a/ast.cpp b/ast.cpp new file mode 100644 index 0000000..07bc507 --- /dev/null +++ b/ast.cpp @@ -0,0 +1,78 @@ +#include "ast.hpp" + +int ThunkAST::tcount = 0; + +NumberAST::NumberAST(const std::string& n): BaseAST(n) {} + +llvm::Value *NumberAST::codegen(LLVMState& llvmState) const +{ + auto val = llvmState.createInt(std::stoi(name)); + return llvmState.builder.CreateStore(val, llvmState.createPush()); +} + +PushAST::PushAST(const std::string& n): BaseAST(n) {} + +llvm::Value *PushAST::codegen(LLVMState& llvmState) const +{ + if (auto [var, thunk] = Var::lookup(name); var) { + auto dsget = llvmState.createPush(); + if (!thunk) + var = llvmState.builder.CreateLoad(llvmState.inttype, var); + + return llvmState.builder.CreateStore(var, dsget); + } else { + return nullptr; + } +} + +PopAST::PopAST(const std::string& n): BaseAST(n) {} + +llvm::Value *PopAST::codegen(LLVMState& llvmState) const +{ + auto gep = llvmState.createPop(); + auto var = llvmState.createVariable(name); + auto load = llvmState.builder.CreateLoad(llvmState.inttype, gep); + llvmState.builder.CreateStore(load, var, false); + + return Var::addLocal(name, var).value; +} + +CallAST::CallAST(const std::string& n): BaseAST(n) {} + +llvm::Value *CallAST::codegen(LLVMState& llvmState) const +{ + if (auto [var, call] = Var::lookup(name); var) { + if (call) { + return llvmState.builder.CreateCall(llvmState.ftype, var); + } else { + auto val = llvmState.builder.CreateLoad(llvmState.inttype, var); + auto cast = llvmState.builder.CreateIntToPtr(val, + llvmState.inttype->getPointerTo()); + return llvmState.builder.CreateCall(llvmState.ftype, cast); + } + } else { + auto func = llvmState.createFunction(name); + Var::addGlobal(name, Var {func, true}); + return llvmState.builder.CreateCall(llvmState.ftype, func); + } +} + +ThunkAST::ThunkAST(LLVMState& llvmState): + ThunkAST(llvmState, std::string("__t") + std::to_string(tcount++)) {} + +ThunkAST::ThunkAST(LLVMState& llvmState, std::string n): BaseAST(n) +{ + parent = llvmState.builder.saveIP(); + func = llvmState.createFunction(name); + auto BB = llvmState.createEntry(func); + llvmState.builder.SetInsertPoint(BB); +} + +llvm::Value *ThunkAST::codegen(LLVMState& llvmState) const +{ + llvmState.builder.CreateRetVoid(); + llvmState.builder.restoreIP(parent); + + return func; +} + diff --git a/ast.hpp b/ast.hpp new file mode 100644 index 0000000..217e221 --- /dev/null +++ b/ast.hpp @@ -0,0 +1,63 @@ +#ifndef FORSPLL_AST_HPP +#define FORSPLL_AST_HPP + +#include "llvm.hpp" +#include "var.hpp" + +#include +#include +#include + +struct BaseAST +{ + std::string name; + + BaseAST(const std::string& n): name(n) {} + + virtual ~BaseAST() = default; + virtual llvm::Value *codegen(LLVMState&) const = 0; +}; + +struct NumberAST : public BaseAST +{ + // push number onto stack + explicit NumberAST(const std::string& n); + llvm::Value *codegen(LLVMState& llvmState) const override; +}; + +struct PushAST : public BaseAST +{ + // push named value to stack + explicit PushAST(const std::string& n); + llvm::Value *codegen(LLVMState& llvmState) const override; +}; + +struct PopAST : public BaseAST +{ + // pop value on stack to named var + explicit PopAST(const std::string& n); + llvm::Value *codegen(LLVMState& llvmState) const override; +}; + +struct CallAST : public BaseAST +{ + // invoke named invocable + explicit CallAST(const std::string& n); + llvm::Value *codegen(LLVMState& llvmState) const override; +}; + +struct ThunkAST : public BaseAST +{ + static int tcount; + + std::list> body; + llvm::IRBuilderBase::InsertPoint parent; + llvm::Function *func; + + explicit ThunkAST(LLVMState& llvmState); + explicit ThunkAST(LLVMState& llvmState, std::string n); + llvm::Value *codegen(LLVMState& llvmState) const override; +}; + +#endif // FORSPLL_AST_HPP + diff --git a/llvm.cpp b/llvm.cpp new file mode 100644 index 0000000..fc56d9e --- /dev/null +++ b/llvm.cpp @@ -0,0 +1,64 @@ +#include "llvm.hpp" + +LLVMState::LLVMState(): + ctx(), + modul("forsp", ctx), + builder(ctx), + inttype(llvm::Type::getInt32Ty(ctx)), + stacktype(llvm::VectorType::get(inttype, 12, false)), + ftype(llvm::FunctionType::get(llvm::Type::getVoidTy(ctx), {}, false)), + one(llvm::ConstantInt::get(inttype, 1)), + zero(llvm::ConstantInt::get(inttype, 0)) +{ + auto zerovec = llvm::ConstantVector::get(llvm::ArrayRef(zero)); + llvmSp = new llvm::GlobalVariable(modul, inttype, false, + llvm::GlobalValue::ExternalLinkage, zero, "sp"); + llvmStack = new llvm::GlobalVariable(modul, stacktype, false, + llvm::GlobalValue::ExternalLinkage, zerovec, "stack"); +} + +llvm::Value *LLVMState::createPush() +{ + auto dspval = builder.CreateLoad(inttype, llvmSp); + auto inc = builder.CreateAdd(dspval, one); + builder.CreateStore(inc, llvmSp, false); + + return builder.CreateGEP(stacktype, llvmStack, {zero, dspval}); +} + +llvm::Value *LLVMState::createPop() +{ + auto dspval = builder.CreateLoad(inttype, llvmSp); + auto dec = builder.CreateSub(dspval, one); + builder.CreateStore(dec, llvmSp, false); + + return builder.CreateGEP(stacktype, llvmStack, {zero, dec}); +} + +llvm::Function *LLVMState::createFunction(const std::string& name) +{ + return llvm::Function::Create(ftype, llvm::Function::ExternalLinkage, + name.c_str(), modul); +} + +llvm::BasicBlock *LLVMState::createEntry(llvm::Function *func) +{ + return llvm::BasicBlock::Create(ctx, "entry", func); +} + +llvm::Value *LLVMState::createVariable(const std::string& name) +{ + return new llvm::GlobalVariable(modul, inttype, false, + llvm::GlobalValue::InternalLinkage, zero, name); +} + +llvm::Constant *LLVMState::createInt(int n) +{ + return llvm::ConstantInt::get(ctx, llvm::APInt(32, n, true)); +} + +void LLVMState::output() +{ + //std::cout << "LLVM:" << std::endl; + modul.print(llvm::errs(), nullptr); +} diff --git a/llvm.hpp b/llvm.hpp new file mode 100644 index 0000000..008d5cd --- /dev/null +++ b/llvm.hpp @@ -0,0 +1,41 @@ +#ifndef FORSPLL_LLVM_HPP +#define FORSPLL_LLVM_HPP + +#include +#include +#include +#include +#include +#include +#include + +#include + +struct LLVMState +{ + llvm::LLVMContext ctx; + llvm::Module modul; + llvm::IRBuilder<> builder; + + llvm::Type *inttype; + llvm::Type *stacktype; + llvm::FunctionType *ftype; + llvm::Constant *one; + llvm::Constant *zero; + llvm::Constant *llvmSp; + llvm::Constant *llvmStack; + + LLVMState(); + + llvm::Value *createPush(); + llvm::Value *createPop(); + llvm::Function *createFunction(const std::string& name); + llvm::BasicBlock *createEntry(llvm::Function *func); + llvm::Value *createVariable(const std::string& name); + llvm::Constant *createInt(int n); + + void output(); +}; + +#endif // FORSPLL_LLVM_HPP + diff --git a/main.cpp b/main.cpp index e963340..50c0266 100644 --- a/main.cpp +++ b/main.cpp @@ -3,281 +3,34 @@ #include #include #include -#include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include +#include "ast.hpp" +#include "llvm.hpp" +#include "parser.hpp" +#include "var.hpp" -static std::unique_ptr llvmContext; -static std::unique_ptr llvmModule; -static std::unique_ptr> llvmBuilder; -static llvm::Constant *llvmSp; -static llvm::Constant *llvmStack; - -struct Var { - llvm::Value *value; - bool callable; - - Var(llvm::Value *v = nullptr, bool c = false): value(v), callable(c) {} -}; -static std::list> llvmVars; - -Var llvmVarGet(const std::string& name, int skip = 0) { - for (auto sc = llvmVars.rbegin(); sc != llvmVars.rend(); ++sc) { - if (skip > 0) { - --skip; - continue; - } - if (sc->contains(name)) - return (*sc)[name]; - } - - return {}; -} - -struct BaseAST -{ - std::string name; - - BaseAST(const std::string& n): name(n) {} - - virtual ~BaseAST() = default; - - virtual llvm::Value *codegen() const { return nullptr; } -}; - -struct NumberAST : public BaseAST -{ - // push number onto stack - - explicit NumberAST(const std::string& n): BaseAST(n) {} - - llvm::Value *codegen() const override { - auto inttype = llvm::Type::getInt32Ty(*llvmContext); - auto stacktype = llvm::VectorType::get(inttype, 12, false); - auto dspval = llvmBuilder->CreateLoad(inttype, llvmSp); - auto one = llvm::ConstantInt::get(inttype, 1); - auto zero = llvm::ConstantInt::get(inttype, 0); - auto inc = llvmBuilder->CreateAdd(dspval, one); - llvmBuilder->CreateStore(inc, llvmSp, false); - - auto dsget = llvmBuilder->CreateGEP(stacktype, llvmStack, {zero, dspval}); - - auto val = llvm::ConstantInt::get(*llvmContext, llvm::APInt(32, std::stoi(name), true)); - return llvmBuilder->CreateStore(val, dsget); - } -}; - -struct PushAST : public BaseAST -{ - // push named value to stack - - explicit PushAST(const std::string& n): BaseAST(n) {} - - llvm::Value *codegen() const override { - if (auto [var, thunk] = llvmVarGet(name); var) { - auto inttype = llvm::Type::getInt32Ty(*llvmContext); - auto stacktype = llvm::VectorType::get(inttype, 12, false); - auto dspval = llvmBuilder->CreateLoad(inttype, llvmSp); - auto one = llvm::ConstantInt::get(inttype, 1); - auto zero = llvm::ConstantInt::get(inttype, 0); - auto inc = llvmBuilder->CreateAdd(dspval, one); - llvmBuilder->CreateStore(inc, llvmSp, false); - - auto dsget = llvmBuilder->CreateGEP(stacktype, llvmStack, {zero, dspval}); - - if (!thunk) - var = llvmBuilder->CreateLoad(inttype, var); - return llvmBuilder->CreateStore(var, dsget); - } else { - return nullptr; - } - } -}; - -struct PopAST : public BaseAST -{ - // pop value on stack to named var - - explicit PopAST(const std::string& n): BaseAST(n) {} - - llvm::Value *codegen() const override { - auto inttype = llvm::Type::getInt32Ty(*llvmContext); - auto stacktype = llvm::VectorType::get(inttype, 12, false); - auto one = llvm::ConstantInt::get(inttype, 1); - auto zero = llvm::ConstantInt::get(inttype, 0); - auto dspval = llvmBuilder->CreateLoad(inttype, llvmSp); - auto dec = llvmBuilder->CreateSub(dspval, one); - auto gep = llvmBuilder->CreateGEP(stacktype, llvmStack, {zero, dec}); - llvmBuilder->CreateStore(dec, llvmSp, false); - - auto var = new llvm::GlobalVariable(*llvmModule, inttype, false, llvm::GlobalValue::InternalLinkage, zero, name); - llvmBuilder->CreateStore(llvmBuilder->CreateLoad(inttype, gep), var, false); - - auto [it, _] = llvmVars.back().emplace(name, var); - return it->second.value; - } -}; - -struct CallAST : public BaseAST -{ - // invoke named invocable - - explicit CallAST(const std::string& n): BaseAST(n) {} - - llvm::Value *codegen() const override { - auto ftype = llvm::FunctionType::get(llvm::Type::getVoidTy(*llvmContext), {}, false); - - if (auto [var, call] = llvmVarGet(name); var) { - if (call) { - return llvmBuilder->CreateCall(ftype, var); - } else { - auto inttype = llvm::Type::getInt32Ty(*llvmContext); - auto val = llvmBuilder->CreateLoad(inttype, var); - auto cast = llvmBuilder->CreateIntToPtr(val, inttype->getPointerTo()); - return llvmBuilder->CreateCall(ftype, cast); - } - } else { - auto func = llvm::Function::Create(ftype, llvm::Function::ExternalLinkage, name, llvmModule.get()); - llvmVars.front().emplace(name, Var {func, true}); - return llvmBuilder->CreateCall(ftype, func); - } - } -}; - -struct ThunkAST : public BaseAST -{ - static int tcount; - - std::list> body; - llvm::IRBuilderBase::InsertPoint parent; - llvm::Function *func; - - explicit ThunkAST(): ThunkAST(std::string("__t") + std::to_string(tcount++)) {} - - explicit ThunkAST(std::string n): BaseAST(n) { - parent = llvmBuilder->saveIP(); - auto ftype = llvm::FunctionType::get(llvm::Type::getVoidTy(*llvmContext), {}, false); - func = llvm::Function::Create(ftype, llvm::Function::ExternalLinkage, name.c_str(), llvmModule.get()); - auto BB = llvm::BasicBlock::Create(*llvmContext, "entry", func); - llvmBuilder->SetInsertPoint(BB); - } - - llvm::Value *codegen() const override { - llvmBuilder->CreateRetVoid(); - llvmBuilder->restoreIP(parent); - - return func; - } -}; -int ThunkAST::tcount = 0; - -enum class Token { - none, - ThunkOpen, - ThunkClose, - Quote, - PopVar, - PushVar, - Var, - Number -}; - -static std::string name; +static LLVMState llvmState; static std::list scope; -bool isname(char ch) { - return !isspace(ch) && ch != ')'; -} - -std::string_view extractName(std::string_view sv) -{ - name.clear(); - - while (!sv.empty()) { - const auto ch = sv.front(); +static bool parseString(std::string_view sv); - if (isname(ch)) { - name += ch; - sv.remove_prefix(1); - } else { - break; - } - } - - return sv; -} - -std::pair nextToken(std::string_view sv) +int main() { - if (sv.empty()) - return {sv, Token::none}; - - while (std::isspace(sv.front())) - sv.remove_prefix(1); + Var::pushScope(); - if (sv.empty()) - return {sv, Token::none}; - - const auto ch = sv.front(); - if (ch == ';') { - return {{}, Token::none}; - } else if (ch == '(') { - return {sv.substr(1), Token::ThunkOpen}; - } else if (ch == ')') { - return {sv.substr(1), Token::ThunkClose}; - } else if (ch == '\'') { - return {extractName(sv.substr(1)), Token::Quote}; - } else if (ch == '$') { - return {extractName(sv.substr(1)), Token::PopVar}; - } else if (ch == '^') { - return {extractName(sv.substr(1)), Token::PushVar}; - } else if (isdigit(ch) || (ch == '-' && sv.size() > 1 && isdigit(sv[1]))) { - return {extractName(sv), Token::Number}; - } else if (isname(ch)) { - return {extractName(sv), Token::Var}; + std::string line; + while (std::cin.good()) { + std::getline(std::cin, line); + parseString(line); + //std::cout << std::endl; } - return {sv, Token::none}; -} + llvmState.output(); -void printToken(Token tok) -{ - switch (tok) { - case Token::ThunkOpen: - std::cout << "ThunkOpen "; - break; - case Token::ThunkClose: - std::cout << "ThunkClose "; - break; - case Token::Quote: - std::cout << "Quote "; - break; - case Token::PopVar: - std::cout << "PopVar "; - break; - case Token::PushVar: - std::cout << "PushVar "; - break; - case Token::Var: - std::cout << "Var "; - break; - case Token::Number: - std::cout << "Number "; - break; - case Token::none: - //std::cout << "none "; - break; - } + std::cout << std::endl; } bool parseString(std::string_view sv) @@ -296,17 +49,17 @@ bool parseString(std::string_view sv) switch (tok) { case Token::ThunkOpen: if (scope.empty()) - scope.emplace_back("main"); + scope.emplace_back(llvmState, "main"); else - scope.emplace_back(); - llvmVars.emplace_back(); + scope.emplace_back(llvmState); + Var::pushScope(); break; case Token::ThunkClose: { auto& thunk = scope.back(); - auto gen = thunk.codegen(); - llvmVars.pop_back(); - llvmVars.back().emplace(thunk.name, Var {gen, true}); + auto gen = thunk.codegen(llvmState); + Var::popScope(); + Var::addLocal(thunk.name, Var {gen, true}); expr.reset(new PushAST {thunk.name}); scope.pop_back(); } @@ -330,7 +83,7 @@ bool parseString(std::string_view sv) } if (expr && !scope.empty()) { - expr->codegen(); + expr->codegen(llvmState); //scope.back().body.emplace_back().swap(expr); } } @@ -341,30 +94,3 @@ bool parseString(std::string_view sv) return true; } -int main() -{ - llvmContext = std::make_unique(); - llvmModule = std::make_unique("forsp", *llvmContext); - llvmBuilder = std::make_unique>(*llvmContext); - auto inttype = llvm::Type::getInt32Ty(*llvmContext); - auto stacktype = llvm::VectorType::get(inttype, 12, false); - auto zero = llvm::ConstantInt::get(inttype, 0); - auto zerovec = llvm::ConstantVector::get(llvm::ArrayRef(dynamic_cast(zero))); - llvmStack = new llvm::GlobalVariable(*llvmModule, stacktype, false, llvm::GlobalValue::ExternalLinkage, zerovec, "stack"); - llvmSp = new llvm::GlobalVariable(*llvmModule, inttype, false, llvm::GlobalValue::ExternalLinkage, zero, "sp"); - - llvmVars.emplace_back(); - - std::string line; - while (std::cin.good()) { - std::getline(std::cin, line); - parseString(line); - //std::cout << std::endl; - } - - //std::cout << "LLVM:" << std::endl; - llvmModule->print(llvm::errs(), nullptr); - - std::cout << std::endl; -} - diff --git a/parser.cpp b/parser.cpp new file mode 100644 index 0000000..f80721a --- /dev/null +++ b/parser.cpp @@ -0,0 +1,92 @@ +#include "parser.hpp" + +#include +#include + +std::string name; + +static bool isname(char ch) { + return !isspace(ch) && ch != ')'; +} + +static std::string_view extractName(std::string_view sv) +{ + name.clear(); + + while (!sv.empty()) { + const auto ch = sv.front(); + + if (isname(ch)) { + name += ch; + sv.remove_prefix(1); + } else { + break; + } + } + + return sv; +} + +std::pair nextToken(std::string_view sv) +{ + if (sv.empty()) + return {sv, Token::none}; + + while (std::isspace(sv.front())) + sv.remove_prefix(1); + + if (sv.empty()) + return {sv, Token::none}; + + const auto ch = sv.front(); + if (ch == ';') { + return {{}, Token::none}; + } else if (ch == '(') { + return {sv.substr(1), Token::ThunkOpen}; + } else if (ch == ')') { + return {sv.substr(1), Token::ThunkClose}; + } else if (ch == '\'') { + return {extractName(sv.substr(1)), Token::Quote}; + } else if (ch == '$') { + return {extractName(sv.substr(1)), Token::PopVar}; + } else if (ch == '^') { + return {extractName(sv.substr(1)), Token::PushVar}; + } else if (isdigit(ch) || (ch == '-' && sv.size() > 1 && isdigit(sv[1]))) { + return {extractName(sv), Token::Number}; + } else if (isname(ch)) { + return {extractName(sv), Token::Var}; + } + + return {sv, Token::none}; +} + +void printToken(Token tok) +{ + switch (tok) { + case Token::ThunkOpen: + std::cout << "ThunkOpen "; + break; + case Token::ThunkClose: + std::cout << "ThunkClose "; + break; + case Token::Quote: + std::cout << "Quote "; + break; + case Token::PopVar: + std::cout << "PopVar "; + break; + case Token::PushVar: + std::cout << "PushVar "; + break; + case Token::Var: + std::cout << "Var "; + break; + case Token::Number: + std::cout << "Number "; + break; + case Token::none: + //std::cout << "none "; + break; + } +} + diff --git a/parser.hpp b/parser.hpp new file mode 100644 index 0000000..962c984 --- /dev/null +++ b/parser.hpp @@ -0,0 +1,25 @@ +#ifndef FORSPLL_PARSER_HPP +#define FORSPLL_PARSER_HPP + +#include +#include +#include + +enum class Token { + none, + ThunkOpen, + ThunkClose, + Quote, + PopVar, + PushVar, + Var, + Number +}; + +extern std::string name; + +std::pair nextToken(std::string_view sv); +void printToken(Token tok); + +#endif // FORSPLL_PARSER_HPP + diff --git a/var.cpp b/var.cpp new file mode 100644 index 0000000..339054f --- /dev/null +++ b/var.cpp @@ -0,0 +1,42 @@ +#include "var.hpp" + +#include +#include +#include + +static std::list> llvmVars; + +Var Var::lookup(const std::string& name, int skip) +{ + for (auto sc = llvmVars.rbegin(); sc != llvmVars.rend(); ++sc) { + if (skip > 0) { + --skip; + continue; + } + if (sc->contains(name)) + return (*sc)[name]; + } + + return {}; +} + +void Var::pushScope() +{ + llvmVars.emplace_back(); +} + +void Var::popScope() +{ + llvmVars.pop_back(); +} + +Var& Var::addGlobal(const std::string& name, Var var) +{ + return llvmVars.front().emplace(name, var).first->second; +} + +Var& Var::addLocal(const std::string& name, Var var) +{ + return llvmVars.back().emplace(name, var).first->second; +} + diff --git a/var.hpp b/var.hpp new file mode 100644 index 0000000..098af69 --- /dev/null +++ b/var.hpp @@ -0,0 +1,22 @@ +#ifndef FORSPLL_VAR_HPP +#define FORSPLL_VAR_HPP + +#include +#include + +struct Var { + llvm::Value *value; + bool callable; + + Var(llvm::Value *v = nullptr, bool c = false): + value(v), callable(c) {} + + static Var lookup(const std::string& name, int skip = 0); + static void pushScope(); + static void popScope(); + static Var& addGlobal(const std::string& name, Var var); + static Var& addLocal(const std::string& name, Var var); +}; + +#endif // FORSPLL_VAR_HPP +