You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

404 lines
9.9 KiB
C++

/**
* lisp-compiler: Compiles LISP using LLVM.
* Copyright (C) 2022 Clyne Sullivan
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
* @file parser.cpp
* @brief Source code parser to produce AST.
*/
#include "parser.hpp"
#include <algorithm>
#include <cctype>
#include <cstdlib>
#include <vector>
void Parser::addString(const std::string& str)
{
if (!str.empty())
std::copy(str.cbegin(), str.cend(), std::back_inserter(text));
}
void Parser::consumeWhitespace() noexcept
{
while (isspace(text.front()))
text.pop_front();
}
std::optional<std::string> Parser::consumeIdentifier() noexcept
{
std::string ret;
// TODO Accept all valid identifiers according to R7RS-small.
if (isalpha(text.front())) {
do {
ret += text.front();
text.pop_front();
} while (isalnum(text.front()) || text.front() == '!');
}
if (ret.empty())
return {};
else
return ret;
}
std::optional<std::variant<int, double>> Parser::consumeLiteralNumber()
{
std::string ret;
while (isdigit(text.front()) || text.front() == '.') {
ret += text.front();
text.pop_front();
}
if (ret.empty())
return {};
if (ret.find('.') == std::string::npos) {
int r = strtol(ret.c_str(), nullptr, 0);
// TODO Error check
return r;
} else {
auto r = strtod(ret.c_str(), nullptr);
// TODO Error check
return r;
}
}
std::deque<AST::Node *> Parser::parse()
{
std::deque<AST::Node *> ret;
errors.clear();
while (!text.empty()) {
consumeWhitespace();
// At the top-level, there will only be procedure calls.
auto node = parseProcedureCall();
if (errors.empty() && node) {
ret.push_back(node);
} else {
return {};
}
}
return ret;
}
AST::Node *Parser::parseExpression()
{
if (text.front() == '(') {
return parseProcedureCall();
} else if (auto id = consumeIdentifier(); id) {
auto ident = new AST::Identifier;
ident->name = *id;
return ident;
} else if (auto d = consumeLiteralNumber(); d) {
auto lit = new AST::Literal;
lit->type = AST::Literal::Number;
lit->value = *d;
return lit;
}
errors.push_back(InvalidExpression);
return nullptr;
}
AST::Node *Parser::parseProcedureCall()
{
// Consume the opening parenthesis.
if (text.front() != '(') {
errors.push_back(ExpectedProcedureCallOpen);
return nullptr;
} else {
text.pop_front();
}
// Consume the identifier string.
auto ident = parseExpression();
if (ident == nullptr)
return nullptr;
// Check for special procedure calls.
if (auto id = dynamic_cast<AST::Identifier *>(ident); id) {
if (id->name == "lambda")
return parseLambdaExpression();
else if (id->name == "define")
return parseDefinition();
else if (id->name == "if")
return parseConditional();
else if (id->name == "set!")
return parseAssignment();
}
// This is a regular procedure call.
// Build the argument list:
std::vector<AST::Node *> args;
consumeWhitespace();
while (text.front() != ')') {
auto node = parseExpression();
if (node == nullptr) {
errors.push_back(InvalidOperand);
return nullptr;
}
args.push_back(node);
consumeWhitespace();
}
if (text.front() == ')') {
text.pop_front();
auto pc = new AST::ProcedureCall;
pc->callee = ident;
pc->operands = args;
return pc;
} else {
errors.push_back(ExpectedProcedureCallClose);
return nullptr;
}
}
AST::Node *Parser::parseConditional()
{
consumeWhitespace();
auto cond = parseExpression();
if (cond == nullptr) {
errors.push_back(InvalidCondition);
return nullptr;
}
consumeWhitespace();
auto ift = parseExpression();
if (ift == nullptr) {
errors.push_back(InvalidThenBranch);
return nullptr;
}
consumeWhitespace();
auto iff = parseExpression();
if (iff == nullptr) {
errors.push_back(InvalidThenBranch);
return nullptr;
}
consumeWhitespace();
if (text.front() == ')') {
text.pop_front();
auto node = new AST::Conditional;
node->cond = cond;
node->iftrue = ift;
node->iffalse = iff;
return node;
} else {
errors.push_back(ExpectedProcedureCallClose);
return nullptr;
}
}
AST::Node *Parser::parseDefinition()
{
consumeWhitespace();
auto ident = consumeIdentifier();
if (ident) {
consumeWhitespace();
auto val = parseExpression();
if (val) {
consumeWhitespace();
if (text.front() == ')') {
text.pop_front();
auto id = new AST::Identifier;
id->name = *ident;
auto def = new AST::Definition;
def->ident = id;
def->value = val;
return def;
} else {
errors.push_back(ExpectedProcedureCallClose);
}
} else {
errors.push_back(InvalidInitializer);
}
} else {
errors.push_back(ExpectedIdentifier);
}
return nullptr;
}
AST::Node *Parser::parseAssignment()
{
consumeWhitespace();
auto ident = consumeIdentifier();
if (ident) {
consumeWhitespace();
auto val = parseExpression();
if (val) {
consumeWhitespace();
if (text.front() == ')') {
text.pop_front();
auto id = new AST::Identifier;
id->name = *ident;
auto def = new AST::Assignment;
def->ident = id;
def->value = val;
return def;
} else {
errors.push_back(ExpectedProcedureCallClose);
}
} else {
errors.push_back(InvalidAssignValue);
}
} else {
errors.push_back(ExpectedIdentifier);
}
return nullptr;
}
AST::Node *Parser::parseLambdaExpression()
{
// Consume beginning of argument list.
consumeWhitespace();
if (text.front() != '(') {
errors.push_back(ExpectedArgumentList);
return nullptr;
} else {
text.pop_front();
}
// Consume argument list:
std::vector<AST::Identifier *> args;
while (text.front() != ')') {
auto arg = consumeIdentifier();
if (arg) {
auto ident = new AST::Identifier;
ident->name = *arg;
args.push_back(ident);
} else {
errors.push_back(InvalidArgumentName);
return nullptr;
}
consumeWhitespace();
}
// Consume arg list closing ')' that must be there.
text.pop_front();
// Consume lambda body:
std::vector<AST::Node *> body;
consumeWhitespace();
while (text.front() != ')') {
auto exp = parseExpression();
if (!errors.empty()) {
errors.push_back(InvalidLambdaBody);
return nullptr;
}
body.push_back(exp);
consumeWhitespace();
}
if (text.front() == ')') {
text.pop_front();
auto le = new AST::LambdaExpression;
le->operands = args;
le->body = body;
return le;
} else {
errors.push_back(ExpectedProcedureCallClose);
return nullptr;
}
}
std::string Parser::describeErrors() noexcept
{
std::string ret;
for (auto& err : errors) {
switch (err) {
case ExpectedProcedureCallOpen:
ret += "Expected opening \'(\' for procedure call.\n";
break;
case ExpectedIdentifier:
ret += "Expected a valid identifier.\n";
break;
case ExpectedProcedureCallClose:
ret += "Expected closing \')\' for procedure call.\n";
break;
case ExpectedArgumentList:
ret += "Expected beginning of argument list.\n";
break;
case UnknownIdentifier:
ret += "Given identifier is not valid.\n";
break;
case InvalidExpression:
ret += "Expected a valid expression.\n";
break;
case InvalidOperand:
ret += "Given invalid argument or operand.\n";
break;
case InvalidCondition:
ret += "Given invalid condition for conditional statement.\n";
break;
case InvalidThenBranch:
ret += "Given invalid \"then\" branch for conditional statement.\n";
break;
case InvalidElseBranch:
ret += "Given invalid \"else\" branch for conditional statement.\n";
break;
case InvalidInitializer:
ret += "Given invalid initializer for a declaration.\n";
break;
case InvalidAssignValue:
ret += "Given invalid value for an assignment.\n";
break;
case InvalidArgumentName:
ret += "Given invalid name for an argument.\n";
break;
case InvalidLambdaBody:
ret += "Lambda body is invalid.\n";
break;
default:
break;
}
}
return ret;
}