diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..65163ff --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +all: main + +main: main.cpp + $(CXX) -o $@ $^ `llvm-config --cxxflags --ldflags --system-libs --libs core` -std=c++20 -ggdb -O0 -g3 + +prog: main test.fp + ./main < test.fp 2> forsp.ir + llc -march=x86 -filetype=obj --relocation-model=pic forsp.ir -O1 + clang -c support.c -m32 -Os + clang support.o forsp.ir.o -m32 -Os + +clean: + rm -f a.out main *.ir *.o diff --git a/README.md b/README.md index c54de9d..9e2199f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ # forspll -Forsp LLVM-based compiler \ No newline at end of file +forspll is an implementation of the [Forsp](https://github.com/xorvoid/forsp) language as an LLVM-based compiler. + +TODO + diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..e963340 --- /dev/null +++ b/main.cpp @@ -0,0 +1,370 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static std::unique_ptr llvmContext; +static std::unique_ptr llvmModule; +static std::unique_ptr> llvmBuilder; +static llvm::Constant *llvmSp; +static llvm::Constant *llvmStack; + +struct Var { + llvm::Value *value; + bool callable; + + Var(llvm::Value *v = nullptr, bool c = false): value(v), callable(c) {} +}; +static std::list> llvmVars; + +Var llvmVarGet(const std::string& name, int skip = 0) { + for (auto sc = llvmVars.rbegin(); sc != llvmVars.rend(); ++sc) { + if (skip > 0) { + --skip; + continue; + } + if (sc->contains(name)) + return (*sc)[name]; + } + + return {}; +} + +struct BaseAST +{ + std::string name; + + BaseAST(const std::string& n): name(n) {} + + virtual ~BaseAST() = default; + + virtual llvm::Value *codegen() const { return nullptr; } +}; + +struct NumberAST : public BaseAST +{ + // push number onto stack + + explicit NumberAST(const std::string& n): BaseAST(n) {} + + llvm::Value *codegen() const override { + auto inttype = llvm::Type::getInt32Ty(*llvmContext); + auto stacktype = llvm::VectorType::get(inttype, 12, false); + auto dspval = llvmBuilder->CreateLoad(inttype, llvmSp); + auto one = llvm::ConstantInt::get(inttype, 1); + auto zero = llvm::ConstantInt::get(inttype, 0); + auto inc = llvmBuilder->CreateAdd(dspval, one); + llvmBuilder->CreateStore(inc, llvmSp, false); + + auto dsget = llvmBuilder->CreateGEP(stacktype, llvmStack, {zero, dspval}); + + auto val = llvm::ConstantInt::get(*llvmContext, llvm::APInt(32, std::stoi(name), true)); + return llvmBuilder->CreateStore(val, dsget); + } +}; + +struct PushAST : public BaseAST +{ + // push named value to stack + + explicit PushAST(const std::string& n): BaseAST(n) {} + + llvm::Value *codegen() const override { + if (auto [var, thunk] = llvmVarGet(name); var) { + auto inttype = llvm::Type::getInt32Ty(*llvmContext); + auto stacktype = llvm::VectorType::get(inttype, 12, false); + auto dspval = llvmBuilder->CreateLoad(inttype, llvmSp); + auto one = llvm::ConstantInt::get(inttype, 1); + auto zero = llvm::ConstantInt::get(inttype, 0); + auto inc = llvmBuilder->CreateAdd(dspval, one); + llvmBuilder->CreateStore(inc, llvmSp, false); + + auto dsget = llvmBuilder->CreateGEP(stacktype, llvmStack, {zero, dspval}); + + if (!thunk) + var = llvmBuilder->CreateLoad(inttype, var); + return llvmBuilder->CreateStore(var, dsget); + } else { + return nullptr; + } + } +}; + +struct PopAST : public BaseAST +{ + // pop value on stack to named var + + explicit PopAST(const std::string& n): BaseAST(n) {} + + llvm::Value *codegen() const override { + auto inttype = llvm::Type::getInt32Ty(*llvmContext); + auto stacktype = llvm::VectorType::get(inttype, 12, false); + auto one = llvm::ConstantInt::get(inttype, 1); + auto zero = llvm::ConstantInt::get(inttype, 0); + auto dspval = llvmBuilder->CreateLoad(inttype, llvmSp); + auto dec = llvmBuilder->CreateSub(dspval, one); + auto gep = llvmBuilder->CreateGEP(stacktype, llvmStack, {zero, dec}); + llvmBuilder->CreateStore(dec, llvmSp, false); + + auto var = new llvm::GlobalVariable(*llvmModule, inttype, false, llvm::GlobalValue::InternalLinkage, zero, name); + llvmBuilder->CreateStore(llvmBuilder->CreateLoad(inttype, gep), var, false); + + auto [it, _] = llvmVars.back().emplace(name, var); + return it->second.value; + } +}; + +struct CallAST : public BaseAST +{ + // invoke named invocable + + explicit CallAST(const std::string& n): BaseAST(n) {} + + llvm::Value *codegen() const override { + auto ftype = llvm::FunctionType::get(llvm::Type::getVoidTy(*llvmContext), {}, false); + + if (auto [var, call] = llvmVarGet(name); var) { + if (call) { + return llvmBuilder->CreateCall(ftype, var); + } else { + auto inttype = llvm::Type::getInt32Ty(*llvmContext); + auto val = llvmBuilder->CreateLoad(inttype, var); + auto cast = llvmBuilder->CreateIntToPtr(val, inttype->getPointerTo()); + return llvmBuilder->CreateCall(ftype, cast); + } + } else { + auto func = llvm::Function::Create(ftype, llvm::Function::ExternalLinkage, name, llvmModule.get()); + llvmVars.front().emplace(name, Var {func, true}); + return llvmBuilder->CreateCall(ftype, func); + } + } +}; + +struct ThunkAST : public BaseAST +{ + static int tcount; + + std::list> body; + llvm::IRBuilderBase::InsertPoint parent; + llvm::Function *func; + + explicit ThunkAST(): ThunkAST(std::string("__t") + std::to_string(tcount++)) {} + + explicit ThunkAST(std::string n): BaseAST(n) { + parent = llvmBuilder->saveIP(); + auto ftype = llvm::FunctionType::get(llvm::Type::getVoidTy(*llvmContext), {}, false); + func = llvm::Function::Create(ftype, llvm::Function::ExternalLinkage, name.c_str(), llvmModule.get()); + auto BB = llvm::BasicBlock::Create(*llvmContext, "entry", func); + llvmBuilder->SetInsertPoint(BB); + } + + llvm::Value *codegen() const override { + llvmBuilder->CreateRetVoid(); + llvmBuilder->restoreIP(parent); + + return func; + } +}; +int ThunkAST::tcount = 0; + +enum class Token { + none, + ThunkOpen, + ThunkClose, + Quote, + PopVar, + PushVar, + Var, + Number +}; + +static std::string name; +static std::list scope; + +bool isname(char ch) { + return !isspace(ch) && ch != ')'; +} + +std::string_view extractName(std::string_view sv) +{ + name.clear(); + + while (!sv.empty()) { + const auto ch = sv.front(); + + if (isname(ch)) { + name += ch; + sv.remove_prefix(1); + } else { + break; + } + } + + return sv; +} + +std::pair nextToken(std::string_view sv) +{ + if (sv.empty()) + return {sv, Token::none}; + + while (std::isspace(sv.front())) + sv.remove_prefix(1); + + if (sv.empty()) + return {sv, Token::none}; + + const auto ch = sv.front(); + if (ch == ';') { + return {{}, Token::none}; + } else if (ch == '(') { + return {sv.substr(1), Token::ThunkOpen}; + } else if (ch == ')') { + return {sv.substr(1), Token::ThunkClose}; + } else if (ch == '\'') { + return {extractName(sv.substr(1)), Token::Quote}; + } else if (ch == '$') { + return {extractName(sv.substr(1)), Token::PopVar}; + } else if (ch == '^') { + return {extractName(sv.substr(1)), Token::PushVar}; + } else if (isdigit(ch) || (ch == '-' && sv.size() > 1 && isdigit(sv[1]))) { + return {extractName(sv), Token::Number}; + } else if (isname(ch)) { + return {extractName(sv), Token::Var}; + } + + return {sv, Token::none}; +} + +void printToken(Token tok) +{ + switch (tok) { + case Token::ThunkOpen: + std::cout << "ThunkOpen "; + break; + case Token::ThunkClose: + std::cout << "ThunkClose "; + break; + case Token::Quote: + std::cout << "Quote "; + break; + case Token::PopVar: + std::cout << "PopVar "; + break; + case Token::PushVar: + std::cout << "PushVar "; + break; + case Token::Var: + std::cout << "Var "; + break; + case Token::Number: + std::cout << "Number "; + break; + case Token::none: + //std::cout << "none "; + break; + } +} + +bool parseString(std::string_view sv) +{ + do { + const auto [nsv, tok] = nextToken(sv); + + //printToken(tok); + + if (tok == Token::none && !nsv.empty()) { + std::cerr << "unknown " << nsv << std::endl; + break; + } else { + std::unique_ptr expr; + + switch (tok) { + case Token::ThunkOpen: + if (scope.empty()) + scope.emplace_back("main"); + else + scope.emplace_back(); + llvmVars.emplace_back(); + break; + case Token::ThunkClose: + { + auto& thunk = scope.back(); + auto gen = thunk.codegen(); + llvmVars.pop_back(); + llvmVars.back().emplace(thunk.name, Var {gen, true}); + expr.reset(new PushAST {thunk.name}); + scope.pop_back(); + } + break; + case Token::Quote: + break; + case Token::PopVar: + expr.reset(new PopAST {name}); + break; + case Token::PushVar: + expr.reset(new PushAST {name}); + break; + case Token::Var: + expr.reset(new CallAST {name}); + break; + case Token::Number: + expr.reset(new NumberAST {name}); + break; + case Token::none: + break; + } + + if (expr && !scope.empty()) { + expr->codegen(); + //scope.back().body.emplace_back().swap(expr); + } + } + + sv = nsv; + } while (!sv.empty()); + + return true; +} + +int main() +{ + llvmContext = std::make_unique(); + llvmModule = std::make_unique("forsp", *llvmContext); + llvmBuilder = std::make_unique>(*llvmContext); + auto inttype = llvm::Type::getInt32Ty(*llvmContext); + auto stacktype = llvm::VectorType::get(inttype, 12, false); + auto zero = llvm::ConstantInt::get(inttype, 0); + auto zerovec = llvm::ConstantVector::get(llvm::ArrayRef(dynamic_cast(zero))); + llvmStack = new llvm::GlobalVariable(*llvmModule, stacktype, false, llvm::GlobalValue::ExternalLinkage, zerovec, "stack"); + llvmSp = new llvm::GlobalVariable(*llvmModule, inttype, false, llvm::GlobalValue::ExternalLinkage, zero, "sp"); + + llvmVars.emplace_back(); + + std::string line; + while (std::cin.good()) { + std::getline(std::cin, line); + parseString(line); + //std::cout << std::endl; + } + + //std::cout << "LLVM:" << std::endl; + llvmModule->print(llvm::errs(), nullptr); + + std::cout << std::endl; +} + diff --git a/support.c b/support.c new file mode 100644 index 0000000..cc0304f --- /dev/null +++ b/support.c @@ -0,0 +1,35 @@ +#include +#include + +extern int32_t sp; +extern int32_t stack; + +void emit() +{ + putchar(*(&stack + --sp)); +} + +void sub() +{ + int32_t *st = &stack; + st[sp - 2] -= st[sp - 1]; + --sp; +} + +void cswap() +{ + int32_t *st = &stack; + --sp; + if (st[sp]) { + int32_t tmp = st[sp - 1]; + st[sp - 1] = st[sp - 2]; + st[sp - 2] = tmp; + } +} + +void eq() +{ + int32_t *st = &stack; + --sp; + st[sp - 1] = st[sp - 1] == st[sp]; +} diff --git a/test.fp b/test.fp new file mode 100644 index 0000000..015f2ee --- /dev/null +++ b/test.fp @@ -0,0 +1,29 @@ +( + ; core utilities + ($x ^x ^x) $dup + ($_) $drop + ($x $y ^x ^y) $swap + ($a $b $c ^b ^a ^c) $rot + (sub) $- + (0 swap - -) $+ + (()) $nil + (nil eq) $null? + ($x x) $force + (10 emit) $cr + + ; recursion via y-combinator + ($f ($x (^x x) f) dup force) $Y ($g (^g Y)) $rec + + ; if-stmt + ($c $t $f c ^f ^t rot cswap $_ force) $if + ($f $t $c $fn ^f ^t ^c fn) $endif + + ; range + ($self $body $start $end + ^if (^start ^end eq) nil + (^start body ^end ^start 1 + ^body self) + endif + ) rec $do + + 70 65 ^emit do cr +)