|
|
|
/**
|
|
|
|
* lisp-compiler: Compiles LISP using LLVM.
|
|
|
|
* Copyright (C) 2022 Clyne Sullivan
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify it under
|
|
|
|
* the terms of the GNU General Public License as published by the Free Software
|
|
|
|
* Foundation, either version 3 of the License, or (at your option) any later
|
|
|
|
* version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
|
|
* details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License along with
|
|
|
|
* this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*
|
|
|
|
* @file parser.cpp
|
|
|
|
* @brief Source code parser to produce AST.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "parser.hpp"
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cctype>
|
|
|
|
#include <cstdlib>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
void Parser::addString(const std::string& str)
|
|
|
|
{
|
|
|
|
if (!str.empty())
|
|
|
|
std::copy(str.cbegin(), str.cend(), std::back_inserter(text));
|
|
|
|
}
|
|
|
|
|
|
|
|
void Parser::consumeWhitespace() noexcept
|
|
|
|
{
|
|
|
|
while (isspace(text.front()))
|
|
|
|
text.pop_front();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::optional<std::string> Parser::consumeIdentifier() noexcept
|
|
|
|
{
|
|
|
|
std::string ret;
|
|
|
|
|
|
|
|
// TODO Accept all valid identifiers according to R7RS-small.
|
|
|
|
if (isalpha(text.front())) {
|
|
|
|
do {
|
|
|
|
ret += text.front();
|
|
|
|
text.pop_front();
|
|
|
|
} while (isalnum(text.front()) || text.front() == '!');
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret.empty())
|
|
|
|
return {};
|
|
|
|
else
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::optional<std::variant<int, double>> Parser::consumeLiteralNumber()
|
|
|
|
{
|
|
|
|
std::string ret;
|
|
|
|
|
|
|
|
while (isdigit(text.front()) || text.front() == '.') {
|
|
|
|
ret += text.front();
|
|
|
|
text.pop_front();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret.empty())
|
|
|
|
return {};
|
|
|
|
|
|
|
|
if (ret.find('.') == std::string::npos) {
|
|
|
|
int r = strtol(ret.c_str(), nullptr, 0);
|
|
|
|
// TODO Error check
|
|
|
|
return r;
|
|
|
|
} else {
|
|
|
|
auto r = strtod(ret.c_str(), nullptr);
|
|
|
|
// TODO Error check
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::deque<AST::Node *> Parser::parse()
|
|
|
|
{
|
|
|
|
std::deque<AST::Node *> ret;
|
|
|
|
|
|
|
|
errors.clear();
|
|
|
|
while (!text.empty()) {
|
|
|
|
consumeWhitespace();
|
|
|
|
|
|
|
|
// At the top-level, there will only be procedure calls.
|
|
|
|
auto node = parseProcedureCall();
|
|
|
|
if (errors.empty() && node) {
|
|
|
|
ret.push_back(node);
|
|
|
|
} else {
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
AST::Node *Parser::parseExpression()
|
|
|
|
{
|
|
|
|
if (text.front() == '(') {
|
|
|
|
return parseProcedureCall();
|
|
|
|
} else if (auto id = consumeIdentifier(); id) {
|
|
|
|
auto ident = new AST::Identifier;
|
|
|
|
ident->name = *id;
|
|
|
|
return ident;
|
|
|
|
} else if (auto d = consumeLiteralNumber(); d) {
|
|
|
|
auto lit = new AST::Literal;
|
|
|
|
lit->type = AST::Literal::Number;
|
|
|
|
lit->value = *d;
|
|
|
|
return lit;
|
|
|
|
}
|
|
|
|
|
|
|
|
errors.push_back(InvalidExpression);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
AST::Node *Parser::parseProcedureCall()
|
|
|
|
{
|
|
|
|
// Consume the opening parenthesis.
|
|
|
|
if (text.front() != '(') {
|
|
|
|
errors.push_back(ExpectedProcedureCallOpen);
|
|
|
|
return nullptr;
|
|
|
|
} else {
|
|
|
|
text.pop_front();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Consume the identifier string.
|
|
|
|
auto ident = parseExpression();
|
|
|
|
if (ident == nullptr)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Check for special procedure calls.
|
|
|
|
if (auto id = dynamic_cast<AST::Identifier *>(ident); id) {
|
|
|
|
if (id->name == "lambda")
|
|
|
|
return parseLambdaExpression();
|
|
|
|
else if (id->name == "define")
|
|
|
|
return parseDefinition();
|
|
|
|
else if (id->name == "if")
|
|
|
|
return parseConditional();
|
|
|
|
else if (id->name == "set!")
|
|
|
|
return parseAssignment();
|
|
|
|
}
|
|
|
|
|
|
|
|
// This is a regular procedure call.
|
|
|
|
// Build the argument list:
|
|
|
|
|
|
|
|
std::vector<AST::Node *> args;
|
|
|
|
|
|
|
|
consumeWhitespace();
|
|
|
|
while (text.front() != ')') {
|
|
|
|
auto node = parseExpression();
|
|
|
|
if (node == nullptr) {
|
|
|
|
errors.push_back(InvalidOperand);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
args.push_back(node);
|
|
|
|
consumeWhitespace();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (text.front() == ')') {
|
|
|
|
text.pop_front();
|
|
|
|
|
|
|
|
auto pc = new AST::ProcedureCall;
|
|
|
|
pc->callee = ident;
|
|
|
|
pc->operands = args;
|
|
|
|
|
|
|
|
return pc;
|
|
|
|
} else {
|
|
|
|
errors.push_back(ExpectedProcedureCallClose);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
AST::Node *Parser::parseConditional()
|
|
|
|
{
|
|
|
|
consumeWhitespace();
|
|
|
|
auto cond = parseExpression();
|
|
|
|
if (cond == nullptr) {
|
|
|
|
errors.push_back(InvalidCondition);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
consumeWhitespace();
|
|
|
|
auto ift = parseExpression();
|
|
|
|
if (ift == nullptr) {
|
|
|
|
errors.push_back(InvalidThenBranch);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
consumeWhitespace();
|
|
|
|
auto iff = parseExpression();
|
|
|
|
if (iff == nullptr) {
|
|
|
|
errors.push_back(InvalidThenBranch);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
consumeWhitespace();
|
|
|
|
if (text.front() == ')') {
|
|
|
|
text.pop_front();
|
|
|
|
|
|
|
|
auto node = new AST::Conditional;
|
|
|
|
node->cond = cond;
|
|
|
|
node->iftrue = ift;
|
|
|
|
node->iffalse = iff;
|
|
|
|
return node;
|
|
|
|
} else {
|
|
|
|
errors.push_back(ExpectedProcedureCallClose);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
AST::Node *Parser::parseDefinition()
|
|
|
|
{
|
|
|
|
consumeWhitespace();
|
|
|
|
auto ident = consumeIdentifier();
|
|
|
|
|
|
|
|
if (ident) {
|
|
|
|
consumeWhitespace();
|
|
|
|
|
|
|
|
auto val = parseExpression();
|
|
|
|
if (val) {
|
|
|
|
consumeWhitespace();
|
|
|
|
if (text.front() == ')') {
|
|
|
|
text.pop_front();
|
|
|
|
|
|
|
|
auto id = new AST::Identifier;
|
|
|
|
id->name = *ident;
|
|
|
|
|
|
|
|
auto def = new AST::Definition;
|
|
|
|
def->ident = id;
|
|
|
|
def->value = val;
|
|
|
|
return def;
|
|
|
|
} else {
|
|
|
|
errors.push_back(ExpectedProcedureCallClose);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
errors.push_back(InvalidInitializer);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
errors.push_back(ExpectedIdentifier);
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
AST::Node *Parser::parseAssignment()
|
|
|
|
{
|
|
|
|
consumeWhitespace();
|
|
|
|
auto ident = consumeIdentifier();
|
|
|
|
|
|
|
|
if (ident) {
|
|
|
|
consumeWhitespace();
|
|
|
|
|
|
|
|
auto val = parseExpression();
|
|
|
|
if (val) {
|
|
|
|
consumeWhitespace();
|
|
|
|
if (text.front() == ')') {
|
|
|
|
text.pop_front();
|
|
|
|
|
|
|
|
auto id = new AST::Identifier;
|
|
|
|
id->name = *ident;
|
|
|
|
|
|
|
|
auto def = new AST::Assignment;
|
|
|
|
def->ident = id;
|
|
|
|
def->value = val;
|
|
|
|
return def;
|
|
|
|
} else {
|
|
|
|
errors.push_back(ExpectedProcedureCallClose);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
errors.push_back(InvalidAssignValue);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
errors.push_back(ExpectedIdentifier);
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
AST::Node *Parser::parseLambdaExpression()
|
|
|
|
{
|
|
|
|
// Consume beginning of argument list.
|
|
|
|
consumeWhitespace();
|
|
|
|
if (text.front() != '(') {
|
|
|
|
errors.push_back(ExpectedArgumentList);
|
|
|
|
return nullptr;
|
|
|
|
} else {
|
|
|
|
text.pop_front();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Consume argument list:
|
|
|
|
|
|
|
|
std::vector<AST::Identifier *> args;
|
|
|
|
|
|
|
|
while (text.front() != ')') {
|
|
|
|
auto arg = consumeIdentifier();
|
|
|
|
if (arg) {
|
|
|
|
auto ident = new AST::Identifier;
|
|
|
|
ident->name = *arg;
|
|
|
|
args.push_back(ident);
|
|
|
|
} else {
|
|
|
|
errors.push_back(InvalidArgumentName);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
consumeWhitespace();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Consume arg list closing ')' that must be there.
|
|
|
|
text.pop_front();
|
|
|
|
|
|
|
|
// Consume lambda body:
|
|
|
|
|
|
|
|
std::vector<AST::Node *> body;
|
|
|
|
|
|
|
|
consumeWhitespace();
|
|
|
|
while (text.front() != ')') {
|
|
|
|
auto exp = parseExpression();
|
|
|
|
|
|
|
|
if (!errors.empty()) {
|
|
|
|
errors.push_back(InvalidLambdaBody);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
body.push_back(exp);
|
|
|
|
consumeWhitespace();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (text.front() == ')') {
|
|
|
|
text.pop_front();
|
|
|
|
|
|
|
|
auto le = new AST::LambdaExpression;
|
|
|
|
le->operands = args;
|
|
|
|
le->body = body;
|
|
|
|
|
|
|
|
return le;
|
|
|
|
} else {
|
|
|
|
errors.push_back(ExpectedProcedureCallClose);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string Parser::describeErrors() noexcept
|
|
|
|
{
|
|
|
|
std::string ret;
|
|
|
|
|
|
|
|
for (auto& err : errors) {
|
|
|
|
switch (err) {
|
|
|
|
case ExpectedProcedureCallOpen:
|
|
|
|
ret += "Expected opening \'(\' for procedure call.\n";
|
|
|
|
break;
|
|
|
|
case ExpectedIdentifier:
|
|
|
|
ret += "Expected a valid identifier.\n";
|
|
|
|
break;
|
|
|
|
case ExpectedProcedureCallClose:
|
|
|
|
ret += "Expected closing \')\' for procedure call.\n";
|
|
|
|
break;
|
|
|
|
case ExpectedArgumentList:
|
|
|
|
ret += "Expected beginning of argument list.\n";
|
|
|
|
break;
|
|
|
|
case UnknownIdentifier:
|
|
|
|
ret += "Given identifier is not valid.\n";
|
|
|
|
break;
|
|
|
|
case InvalidExpression:
|
|
|
|
ret += "Expected a valid expression.\n";
|
|
|
|
break;
|
|
|
|
case InvalidOperand:
|
|
|
|
ret += "Given invalid argument or operand.\n";
|
|
|
|
break;
|
|
|
|
case InvalidCondition:
|
|
|
|
ret += "Given invalid condition for conditional statement.\n";
|
|
|
|
break;
|
|
|
|
case InvalidThenBranch:
|
|
|
|
ret += "Given invalid \"then\" branch for conditional statement.\n";
|
|
|
|
break;
|
|
|
|
case InvalidElseBranch:
|
|
|
|
ret += "Given invalid \"else\" branch for conditional statement.\n";
|
|
|
|
break;
|
|
|
|
case InvalidInitializer:
|
|
|
|
ret += "Given invalid initializer for a declaration.\n";
|
|
|
|
break;
|
|
|
|
case InvalidAssignValue:
|
|
|
|
ret += "Given invalid value for an assignment.\n";
|
|
|
|
break;
|
|
|
|
case InvalidArgumentName:
|
|
|
|
ret += "Given invalid name for an argument.\n";
|
|
|
|
break;
|
|
|
|
case InvalidLambdaBody:
|
|
|
|
ret += "Lambda body is invalid.\n";
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|