break code into separate files

main
Clyne 6 months ago
parent 47d7c964d0
commit 0ad40ced64
Signed by: clyne
GPG Key ID: 1B74EE6C49C96795

5
.gitignore vendored

@ -0,0 +1,5 @@
a.out
main
*.ir
*.o
*.sw*

@ -1,7 +1,11 @@
CXXFILES := main.cpp parser.cpp llvm.cpp ast.cpp var.cpp
CXXFLAGS := `llvm-config --cxxflags` -std=c++20 -ggdb -O0 -g3
LDFLAGS := `llvm-config --ldflags --system-libs --libs core`
all: main all: main
main: main.cpp main: $(subst .cpp,.o,$(CXXFILES))
$(CXX) -o $@ $^ `llvm-config --cxxflags --ldflags --system-libs --libs core` -std=c++20 -ggdb -O0 -g3 $(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
prog: main test.fp prog: main test.fp
./main < test.fp 2> forsp.ir ./main < test.fp 2> forsp.ir

@ -1,6 +1,23 @@
# forspll # forspll
forspll is an implementation of the [Forsp](https://github.com/xorvoid/forsp) language as an LLVM-based compiler. forspll is an implementation of [Forsp](https://xorvoid.com/forsp.html) as an LLVM-based compiler. Forsp is a tiny yet very versatile programming language that mixes features of Forth and Lisp. Through LLVM, Forsp can be compiled into efficient machine code for a wide array of platforms.
TODO forspll features:
* Lisp-style S-expression syntax
* Forth-style data stack for parameters/values
* Linking with C functions (see `support.c`)
Missing features:
* Quote operator: `quote`/`'`
* Lists and atoms or any dynamic allocations
## Building
Requires Clang and LLVM development files.
Run `make` to build the compiler.
Run `make prog` to compile `test.fp` and `support.c`.

@ -0,0 +1,78 @@
#include "ast.hpp"
int ThunkAST::tcount = 0;
NumberAST::NumberAST(const std::string& n): BaseAST(n) {}
llvm::Value *NumberAST::codegen(LLVMState& llvmState) const
{
auto val = llvmState.createInt(std::stoi(name));
return llvmState.builder.CreateStore(val, llvmState.createPush());
}
PushAST::PushAST(const std::string& n): BaseAST(n) {}
llvm::Value *PushAST::codegen(LLVMState& llvmState) const
{
if (auto [var, thunk] = Var::lookup(name); var) {
auto dsget = llvmState.createPush();
if (!thunk)
var = llvmState.builder.CreateLoad(llvmState.inttype, var);
return llvmState.builder.CreateStore(var, dsget);
} else {
return nullptr;
}
}
PopAST::PopAST(const std::string& n): BaseAST(n) {}
llvm::Value *PopAST::codegen(LLVMState& llvmState) const
{
auto gep = llvmState.createPop();
auto var = llvmState.createVariable(name);
auto load = llvmState.builder.CreateLoad(llvmState.inttype, gep);
llvmState.builder.CreateStore(load, var, false);
return Var::addLocal(name, var).value;
}
CallAST::CallAST(const std::string& n): BaseAST(n) {}
llvm::Value *CallAST::codegen(LLVMState& llvmState) const
{
if (auto [var, call] = Var::lookup(name); var) {
if (call) {
return llvmState.builder.CreateCall(llvmState.ftype, var);
} else {
auto val = llvmState.builder.CreateLoad(llvmState.inttype, var);
auto cast = llvmState.builder.CreateIntToPtr(val,
llvmState.inttype->getPointerTo());
return llvmState.builder.CreateCall(llvmState.ftype, cast);
}
} else {
auto func = llvmState.createFunction(name);
Var::addGlobal(name, Var {func, true});
return llvmState.builder.CreateCall(llvmState.ftype, func);
}
}
ThunkAST::ThunkAST(LLVMState& llvmState):
ThunkAST(llvmState, std::string("__t") + std::to_string(tcount++)) {}
ThunkAST::ThunkAST(LLVMState& llvmState, std::string n): BaseAST(n)
{
parent = llvmState.builder.saveIP();
func = llvmState.createFunction(name);
auto BB = llvmState.createEntry(func);
llvmState.builder.SetInsertPoint(BB);
}
llvm::Value *ThunkAST::codegen(LLVMState& llvmState) const
{
llvmState.builder.CreateRetVoid();
llvmState.builder.restoreIP(parent);
return func;
}

@ -0,0 +1,63 @@
#ifndef FORSPLL_AST_HPP
#define FORSPLL_AST_HPP
#include "llvm.hpp"
#include "var.hpp"
#include <list>
#include <memory>
#include <string>
struct BaseAST
{
std::string name;
BaseAST(const std::string& n): name(n) {}
virtual ~BaseAST() = default;
virtual llvm::Value *codegen(LLVMState&) const = 0;
};
struct NumberAST : public BaseAST
{
// push number onto stack
explicit NumberAST(const std::string& n);
llvm::Value *codegen(LLVMState& llvmState) const override;
};
struct PushAST : public BaseAST
{
// push named value to stack
explicit PushAST(const std::string& n);
llvm::Value *codegen(LLVMState& llvmState) const override;
};
struct PopAST : public BaseAST
{
// pop value on stack to named var
explicit PopAST(const std::string& n);
llvm::Value *codegen(LLVMState& llvmState) const override;
};
struct CallAST : public BaseAST
{
// invoke named invocable
explicit CallAST(const std::string& n);
llvm::Value *codegen(LLVMState& llvmState) const override;
};
struct ThunkAST : public BaseAST
{
static int tcount;
std::list<std::unique_ptr<BaseAST>> body;
llvm::IRBuilderBase::InsertPoint parent;
llvm::Function *func;
explicit ThunkAST(LLVMState& llvmState);
explicit ThunkAST(LLVMState& llvmState, std::string n);
llvm::Value *codegen(LLVMState& llvmState) const override;
};
#endif // FORSPLL_AST_HPP

@ -0,0 +1,64 @@
#include "llvm.hpp"
LLVMState::LLVMState():
ctx(),
modul("forsp", ctx),
builder(ctx),
inttype(llvm::Type::getInt32Ty(ctx)),
stacktype(llvm::VectorType::get(inttype, 12, false)),
ftype(llvm::FunctionType::get(llvm::Type::getVoidTy(ctx), {}, false)),
one(llvm::ConstantInt::get(inttype, 1)),
zero(llvm::ConstantInt::get(inttype, 0))
{
auto zerovec = llvm::ConstantVector::get(llvm::ArrayRef(zero));
llvmSp = new llvm::GlobalVariable(modul, inttype, false,
llvm::GlobalValue::ExternalLinkage, zero, "sp");
llvmStack = new llvm::GlobalVariable(modul, stacktype, false,
llvm::GlobalValue::ExternalLinkage, zerovec, "stack");
}
llvm::Value *LLVMState::createPush()
{
auto dspval = builder.CreateLoad(inttype, llvmSp);
auto inc = builder.CreateAdd(dspval, one);
builder.CreateStore(inc, llvmSp, false);
return builder.CreateGEP(stacktype, llvmStack, {zero, dspval});
}
llvm::Value *LLVMState::createPop()
{
auto dspval = builder.CreateLoad(inttype, llvmSp);
auto dec = builder.CreateSub(dspval, one);
builder.CreateStore(dec, llvmSp, false);
return builder.CreateGEP(stacktype, llvmStack, {zero, dec});
}
llvm::Function *LLVMState::createFunction(const std::string& name)
{
return llvm::Function::Create(ftype, llvm::Function::ExternalLinkage,
name.c_str(), modul);
}
llvm::BasicBlock *LLVMState::createEntry(llvm::Function *func)
{
return llvm::BasicBlock::Create(ctx, "entry", func);
}
llvm::Value *LLVMState::createVariable(const std::string& name)
{
return new llvm::GlobalVariable(modul, inttype, false,
llvm::GlobalValue::InternalLinkage, zero, name);
}
llvm::Constant *LLVMState::createInt(int n)
{
return llvm::ConstantInt::get(ctx, llvm::APInt(32, n, true));
}
void LLVMState::output()
{
//std::cout << "LLVM:" << std::endl;
modul.print(llvm::errs(), nullptr);
}

@ -0,0 +1,41 @@
#ifndef FORSPLL_LLVM_HPP
#define FORSPLL_LLVM_HPP
#include <llvm/ADT/APInt.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Type.h>
#include <llvm/IR/DerivedTypes.h>
#include <string>
struct LLVMState
{
llvm::LLVMContext ctx;
llvm::Module modul;
llvm::IRBuilder<> builder;
llvm::Type *inttype;
llvm::Type *stacktype;
llvm::FunctionType *ftype;
llvm::Constant *one;
llvm::Constant *zero;
llvm::Constant *llvmSp;
llvm::Constant *llvmStack;
LLVMState();
llvm::Value *createPush();
llvm::Value *createPop();
llvm::Function *createFunction(const std::string& name);
llvm::BasicBlock *createEntry(llvm::Function *func);
llvm::Value *createVariable(const std::string& name);
llvm::Constant *createInt(int n);
void output();
};
#endif // FORSPLL_LLVM_HPP

@ -3,281 +3,34 @@
#include <iostream> #include <iostream>
#include <list> #include <list>
#include <map> #include <map>
#include <memory>
#include <set>
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <tuple> #include <tuple>
#include <llvm/ADT/APInt.h> #include "ast.hpp"
#include <llvm/IR/Constants.h> #include "llvm.hpp"
#include <llvm/IR/IRBuilder.h> #include "parser.hpp"
#include <llvm/IR/LLVMContext.h> #include "var.hpp"
#include <llvm/IR/Module.h>
#include <llvm/IR/Type.h>
#include <llvm/IR/DerivedTypes.h>
static std::unique_ptr<llvm::LLVMContext> llvmContext; static LLVMState llvmState;
static std::unique_ptr<llvm::Module> llvmModule;
static std::unique_ptr<llvm::IRBuilder<>> llvmBuilder;
static llvm::Constant *llvmSp;
static llvm::Constant *llvmStack;
struct Var {
llvm::Value *value;
bool callable;
Var(llvm::Value *v = nullptr, bool c = false): value(v), callable(c) {}
};
static std::list<std::map<std::string, Var>> llvmVars;
Var llvmVarGet(const std::string& name, int skip = 0) {
for (auto sc = llvmVars.rbegin(); sc != llvmVars.rend(); ++sc) {
if (skip > 0) {
--skip;
continue;
}
if (sc->contains(name))
return (*sc)[name];
}
return {};
}
struct BaseAST
{
std::string name;
BaseAST(const std::string& n): name(n) {}
virtual ~BaseAST() = default;
virtual llvm::Value *codegen() const { return nullptr; }
};
struct NumberAST : public BaseAST
{
// push number onto stack
explicit NumberAST(const std::string& n): BaseAST(n) {}
llvm::Value *codegen() const override {
auto inttype = llvm::Type::getInt32Ty(*llvmContext);
auto stacktype = llvm::VectorType::get(inttype, 12, false);
auto dspval = llvmBuilder->CreateLoad(inttype, llvmSp);
auto one = llvm::ConstantInt::get(inttype, 1);
auto zero = llvm::ConstantInt::get(inttype, 0);
auto inc = llvmBuilder->CreateAdd(dspval, one);
llvmBuilder->CreateStore(inc, llvmSp, false);
auto dsget = llvmBuilder->CreateGEP(stacktype, llvmStack, {zero, dspval});
auto val = llvm::ConstantInt::get(*llvmContext, llvm::APInt(32, std::stoi(name), true));
return llvmBuilder->CreateStore(val, dsget);
}
};
struct PushAST : public BaseAST
{
// push named value to stack
explicit PushAST(const std::string& n): BaseAST(n) {}
llvm::Value *codegen() const override {
if (auto [var, thunk] = llvmVarGet(name); var) {
auto inttype = llvm::Type::getInt32Ty(*llvmContext);
auto stacktype = llvm::VectorType::get(inttype, 12, false);
auto dspval = llvmBuilder->CreateLoad(inttype, llvmSp);
auto one = llvm::ConstantInt::get(inttype, 1);
auto zero = llvm::ConstantInt::get(inttype, 0);
auto inc = llvmBuilder->CreateAdd(dspval, one);
llvmBuilder->CreateStore(inc, llvmSp, false);
auto dsget = llvmBuilder->CreateGEP(stacktype, llvmStack, {zero, dspval});
if (!thunk)
var = llvmBuilder->CreateLoad(inttype, var);
return llvmBuilder->CreateStore(var, dsget);
} else {
return nullptr;
}
}
};
struct PopAST : public BaseAST
{
// pop value on stack to named var
explicit PopAST(const std::string& n): BaseAST(n) {}
llvm::Value *codegen() const override {
auto inttype = llvm::Type::getInt32Ty(*llvmContext);
auto stacktype = llvm::VectorType::get(inttype, 12, false);
auto one = llvm::ConstantInt::get(inttype, 1);
auto zero = llvm::ConstantInt::get(inttype, 0);
auto dspval = llvmBuilder->CreateLoad(inttype, llvmSp);
auto dec = llvmBuilder->CreateSub(dspval, one);
auto gep = llvmBuilder->CreateGEP(stacktype, llvmStack, {zero, dec});
llvmBuilder->CreateStore(dec, llvmSp, false);
auto var = new llvm::GlobalVariable(*llvmModule, inttype, false, llvm::GlobalValue::InternalLinkage, zero, name);
llvmBuilder->CreateStore(llvmBuilder->CreateLoad(inttype, gep), var, false);
auto [it, _] = llvmVars.back().emplace(name, var);
return it->second.value;
}
};
struct CallAST : public BaseAST
{
// invoke named invocable
explicit CallAST(const std::string& n): BaseAST(n) {}
llvm::Value *codegen() const override {
auto ftype = llvm::FunctionType::get(llvm::Type::getVoidTy(*llvmContext), {}, false);
if (auto [var, call] = llvmVarGet(name); var) {
if (call) {
return llvmBuilder->CreateCall(ftype, var);
} else {
auto inttype = llvm::Type::getInt32Ty(*llvmContext);
auto val = llvmBuilder->CreateLoad(inttype, var);
auto cast = llvmBuilder->CreateIntToPtr(val, inttype->getPointerTo());
return llvmBuilder->CreateCall(ftype, cast);
}
} else {
auto func = llvm::Function::Create(ftype, llvm::Function::ExternalLinkage, name, llvmModule.get());
llvmVars.front().emplace(name, Var {func, true});
return llvmBuilder->CreateCall(ftype, func);
}
}
};
struct ThunkAST : public BaseAST
{
static int tcount;
std::list<std::unique_ptr<BaseAST>> body;
llvm::IRBuilderBase::InsertPoint parent;
llvm::Function *func;
explicit ThunkAST(): ThunkAST(std::string("__t") + std::to_string(tcount++)) {}
explicit ThunkAST(std::string n): BaseAST(n) {
parent = llvmBuilder->saveIP();
auto ftype = llvm::FunctionType::get(llvm::Type::getVoidTy(*llvmContext), {}, false);
func = llvm::Function::Create(ftype, llvm::Function::ExternalLinkage, name.c_str(), llvmModule.get());
auto BB = llvm::BasicBlock::Create(*llvmContext, "entry", func);
llvmBuilder->SetInsertPoint(BB);
}
llvm::Value *codegen() const override {
llvmBuilder->CreateRetVoid();
llvmBuilder->restoreIP(parent);
return func;
}
};
int ThunkAST::tcount = 0;
enum class Token {
none,
ThunkOpen,
ThunkClose,
Quote,
PopVar,
PushVar,
Var,
Number
};
static std::string name;
static std::list<ThunkAST> scope; static std::list<ThunkAST> scope;
bool isname(char ch) { static bool parseString(std::string_view sv);
return !isspace(ch) && ch != ')';
}
std::string_view extractName(std::string_view sv)
{
name.clear();
while (!sv.empty()) {
const auto ch = sv.front();
if (isname(ch)) {
name += ch;
sv.remove_prefix(1);
} else {
break;
}
}
return sv;
}
std::pair<std::string_view, Token> nextToken(std::string_view sv) int main()
{ {
if (sv.empty()) Var::pushScope();
return {sv, Token::none};
while (std::isspace(sv.front()))
sv.remove_prefix(1);
if (sv.empty())
return {sv, Token::none};
const auto ch = sv.front(); std::string line;
if (ch == ';') { while (std::cin.good()) {
return {{}, Token::none}; std::getline(std::cin, line);
} else if (ch == '(') { parseString(line);
return {sv.substr(1), Token::ThunkOpen}; //std::cout << std::endl;
} else if (ch == ')') {
return {sv.substr(1), Token::ThunkClose};
} else if (ch == '\'') {
return {extractName(sv.substr(1)), Token::Quote};
} else if (ch == '$') {
return {extractName(sv.substr(1)), Token::PopVar};
} else if (ch == '^') {
return {extractName(sv.substr(1)), Token::PushVar};
} else if (isdigit(ch) || (ch == '-' && sv.size() > 1 && isdigit(sv[1]))) {
return {extractName(sv), Token::Number};
} else if (isname(ch)) {
return {extractName(sv), Token::Var};
} }
return {sv, Token::none}; llvmState.output();
}
void printToken(Token tok) std::cout << std::endl;
{
switch (tok) {
case Token::ThunkOpen:
std::cout << "ThunkOpen ";
break;
case Token::ThunkClose:
std::cout << "ThunkClose ";
break;
case Token::Quote:
std::cout << "Quote ";
break;
case Token::PopVar:
std::cout << "PopVar ";
break;
case Token::PushVar:
std::cout << "PushVar ";
break;
case Token::Var:
std::cout << "Var ";
break;
case Token::Number:
std::cout << "Number ";
break;
case Token::none:
//std::cout << "none ";
break;
}
} }
bool parseString(std::string_view sv) bool parseString(std::string_view sv)
@ -296,17 +49,17 @@ bool parseString(std::string_view sv)
switch (tok) { switch (tok) {
case Token::ThunkOpen: case Token::ThunkOpen:
if (scope.empty()) if (scope.empty())
scope.emplace_back("main"); scope.emplace_back(llvmState, "main");
else else
scope.emplace_back(); scope.emplace_back(llvmState);
llvmVars.emplace_back(); Var::pushScope();
break; break;
case Token::ThunkClose: case Token::ThunkClose:
{ {
auto& thunk = scope.back(); auto& thunk = scope.back();
auto gen = thunk.codegen(); auto gen = thunk.codegen(llvmState);
llvmVars.pop_back(); Var::popScope();
llvmVars.back().emplace(thunk.name, Var {gen, true}); Var::addLocal(thunk.name, Var {gen, true});
expr.reset(new PushAST {thunk.name}); expr.reset(new PushAST {thunk.name});
scope.pop_back(); scope.pop_back();
} }
@ -330,7 +83,7 @@ bool parseString(std::string_view sv)
} }
if (expr && !scope.empty()) { if (expr && !scope.empty()) {
expr->codegen(); expr->codegen(llvmState);
//scope.back().body.emplace_back().swap(expr); //scope.back().body.emplace_back().swap(expr);
} }
} }
@ -341,30 +94,3 @@ bool parseString(std::string_view sv)
return true; return true;
} }
int main()
{
llvmContext = std::make_unique<llvm::LLVMContext>();
llvmModule = std::make_unique<llvm::Module>("forsp", *llvmContext);
llvmBuilder = std::make_unique<llvm::IRBuilder<>>(*llvmContext);
auto inttype = llvm::Type::getInt32Ty(*llvmContext);
auto stacktype = llvm::VectorType::get(inttype, 12, false);
auto zero = llvm::ConstantInt::get(inttype, 0);
auto zerovec = llvm::ConstantVector::get(llvm::ArrayRef(dynamic_cast<llvm::Constant *>(zero)));
llvmStack = new llvm::GlobalVariable(*llvmModule, stacktype, false, llvm::GlobalValue::ExternalLinkage, zerovec, "stack");
llvmSp = new llvm::GlobalVariable(*llvmModule, inttype, false, llvm::GlobalValue::ExternalLinkage, zero, "sp");
llvmVars.emplace_back();
std::string line;
while (std::cin.good()) {
std::getline(std::cin, line);
parseString(line);
//std::cout << std::endl;
}
//std::cout << "LLVM:" << std::endl;
llvmModule->print(llvm::errs(), nullptr);
std::cout << std::endl;
}

@ -0,0 +1,92 @@
#include "parser.hpp"
#include <cctype>
#include <iostream>
std::string name;
static bool isname(char ch) {
return !isspace(ch) && ch != ')';
}
static std::string_view extractName(std::string_view sv)
{
name.clear();
while (!sv.empty()) {
const auto ch = sv.front();
if (isname(ch)) {
name += ch;
sv.remove_prefix(1);
} else {
break;
}
}
return sv;
}
std::pair<std::string_view, Token> nextToken(std::string_view sv)
{
if (sv.empty())
return {sv, Token::none};
while (std::isspace(sv.front()))
sv.remove_prefix(1);
if (sv.empty())
return {sv, Token::none};
const auto ch = sv.front();
if (ch == ';') {
return {{}, Token::none};
} else if (ch == '(') {
return {sv.substr(1), Token::ThunkOpen};
} else if (ch == ')') {
return {sv.substr(1), Token::ThunkClose};
} else if (ch == '\'') {
return {extractName(sv.substr(1)), Token::Quote};
} else if (ch == '$') {
return {extractName(sv.substr(1)), Token::PopVar};
} else if (ch == '^') {
return {extractName(sv.substr(1)), Token::PushVar};
} else if (isdigit(ch) || (ch == '-' && sv.size() > 1 && isdigit(sv[1]))) {
return {extractName(sv), Token::Number};
} else if (isname(ch)) {
return {extractName(sv), Token::Var};
}
return {sv, Token::none};
}
void printToken(Token tok)
{
switch (tok) {
case Token::ThunkOpen:
std::cout << "ThunkOpen ";
break;
case Token::ThunkClose:
std::cout << "ThunkClose ";
break;
case Token::Quote:
std::cout << "Quote ";
break;
case Token::PopVar:
std::cout << "PopVar ";
break;
case Token::PushVar:
std::cout << "PushVar ";
break;
case Token::Var:
std::cout << "Var ";
break;
case Token::Number:
std::cout << "Number ";
break;
case Token::none:
//std::cout << "none ";
break;
}
}

@ -0,0 +1,25 @@
#ifndef FORSPLL_PARSER_HPP
#define FORSPLL_PARSER_HPP
#include <string>
#include <string_view>
#include <tuple>
enum class Token {
none,
ThunkOpen,
ThunkClose,
Quote,
PopVar,
PushVar,
Var,
Number
};
extern std::string name;
std::pair<std::string_view, Token> nextToken(std::string_view sv);
void printToken(Token tok);
#endif // FORSPLL_PARSER_HPP

@ -0,0 +1,42 @@
#include "var.hpp"
#include <list>
#include <map>
#include <string>
static std::list<std::map<std::string, Var>> llvmVars;
Var Var::lookup(const std::string& name, int skip)
{
for (auto sc = llvmVars.rbegin(); sc != llvmVars.rend(); ++sc) {
if (skip > 0) {
--skip;
continue;
}
if (sc->contains(name))
return (*sc)[name];
}
return {};
}
void Var::pushScope()
{
llvmVars.emplace_back();
}
void Var::popScope()
{
llvmVars.pop_back();
}
Var& Var::addGlobal(const std::string& name, Var var)
{
return llvmVars.front().emplace(name, var).first->second;
}
Var& Var::addLocal(const std::string& name, Var var)
{
return llvmVars.back().emplace(name, var).first->second;
}

@ -0,0 +1,22 @@
#ifndef FORSPLL_VAR_HPP
#define FORSPLL_VAR_HPP
#include <llvm/IR/Type.h>
#include <llvm/IR/DerivedTypes.h>
struct Var {
llvm::Value *value;
bool callable;
Var(llvm::Value *v = nullptr, bool c = false):
value(v), callable(c) {}
static Var lookup(const std::string& name, int skip = 0);
static void pushScope();
static void popScope();
static Var& addGlobal(const std::string& name, Var var);
static Var& addLocal(const std::string& name, Var var);
};
#endif // FORSPLL_VAR_HPP
Loading…
Cancel
Save