Select Git revision
PugsParser.cpp
-
Stéphane Del Pino authoredStéphane Del Pino authored
PugsParser.cpp 17.34 KiB
#include <PugsParser.hpp>
#include <PugsAssert.hpp>
#include <fstream>
#include <iostream>
#include <unordered_map>
#include <variant>
#include <rang.hpp>
#include <pegtl/analyze.hpp>
#include <pegtl/contrib/parse_tree.hpp>
#include <pegtl/contrib/parse_tree_to_dot.hpp>
#include <ASTNode.hpp>
#include <ASTBuilder.hpp>
#include <PEGGrammar.hpp>
#include <SymbolTable.hpp>
#include <EscapedString.hpp>
#include <ASTNodeExpressionBuilder.hpp>
#include <ASTSymbolTableBuilder.hpp>
#include <ASTPrinter.hpp>
namespace language
{
namespace internal
{
void
print_dot(std::ostream& os, const Node& n)
{
if (n.is_root()) {
os << " x" << &n << " [ label=\"root \\n" << dataTypeName(n.m_data_type) << "\" ]\n";
} else {
if (n.has_content()) {
os << " x" << &n << " [ label=\"" << n.name() << "\\n"
<< n.string_view() << "\\n"
<< dataTypeName(n.m_data_type) << "\" ]\n";
} else {
os << " x" << &n << " [ label=\"" << n.name() << "\\n" << dataTypeName(n.m_data_type) << "\" ]\n";
}
}
if (!n.children.empty()) {
os << " x" << &n << " -> { ";
for (auto& child : n.children) {
os << "x" << child.get() << ((child == n.children.back()) ? " }\n" : ", ");
}
for (auto& child : n.children) {
print_dot(os, *child);
}
}
}
} // namespace internal
void
print_dot(std::ostream& os, const Node& n)
{
Assert(n.is_root());
os << "digraph parse_tree\n{\n";
internal::print_dot(os, n);
os << "}\n";
}
// namespace internal
// {
// void
// build_symbol_table_and_check_declarations(Node& n, std::shared_ptr<SymbolTable>& symbol_table)
// {
// if (n.is<language::bloc>() or (n.is<language::for_statement>())) {
// if (!n.children.empty()) {
// std::shared_ptr bloc_symbol_table = std::make_shared<SymbolTable>(symbol_table);
// n.m_symbol_table = bloc_symbol_table;
// for (auto& child : n.children) {
// build_symbol_table_and_check_declarations(*child, bloc_symbol_table);
// }
// }
// } else {
// n.m_symbol_table = symbol_table;
// if (n.has_content()) {
// if (n.is<language::declaration>()) {
// const std::string& symbol = n.children[1]->string();
// auto [i_symbol, success] = symbol_table->add(symbol);
// if (not success) {
// std::ostringstream error_message;
// error_message << "symbol '" << rang::fg::red << symbol << rang::fg::reset << '\'' << " was already
// defined!"; throw parse_error(error_message.str(), std::vector{n.begin()});
// }
// } else if (n.is<language::name>()) {
// auto [i_symbol, found] = symbol_table->find(n.string());
// if (not found) {
// std::ostringstream error_message;
// error_message << "undefined symbol '" << rang::fg::red << n.string() << rang::fg::reset << '\'';
// throw parse_error(error_message.str(), std::vector{n.begin()});
// }
// }
// }
// for (auto& child : n.children) {
// build_symbol_table_and_check_declarations(*child, symbol_table);
// }
// }
// }
// } // namespace internal
// void
// build_symbol_table_and_check_declarations(Node& n)
// {
// Assert(n.is_root());
// std::shared_ptr symbol_table = std::make_shared<SymbolTable>();
// n.m_symbol_table = symbol_table;
// internal::build_symbol_table_and_check_declarations(n, symbol_table);
// std::cout << " - checked symbols declaration\n";
// }
namespace internal
{
void
check_symbol_initialization(const Node& n, std::shared_ptr<SymbolTable>& symbol_table)
{
if (n.is<language::bloc>() or n.is<language::for_statement>()) {
if (!n.children.empty()) {
std::shared_ptr bloc_symbol_table = std::make_shared<SymbolTable>(symbol_table);
for (auto& child : n.children) {
check_symbol_initialization(*child, bloc_symbol_table);
}
}
} else {
if (n.is<language::declaration>()) {
const std::string& symbol = n.children[1]->string();
auto [i_symbol, success] = symbol_table->add(symbol);
Assert(success, "unexpected error, should have been detected through declaration checking");
if (n.children.size() == 3) {
check_symbol_initialization(*n.children[2], symbol_table);
i_symbol->second.setIsInitialized();
}
} else if (n.is<language::eq_op>()) {
// first checks for right hand side
check_symbol_initialization(*n.children[1], symbol_table);
// then marks left hand side as initialized
const std::string& symbol = n.children[0]->string();
auto [i_symbol, found] = symbol_table->find(symbol);
Assert(found, "unexpected error, should have been detected through declaration checking");
i_symbol->second.setIsInitialized();
} else if (n.is<language::name>()) {
auto [i_symbol, found] = symbol_table->find(n.string());
Assert(found, "unexpected error, should have been detected through declaration checking");
if (not i_symbol->second.isInitialized()) {
std::ostringstream error_message;
error_message << "uninitialized symbol '" << rang::fg::red << n.string() << rang::fg::reset << '\'';
throw parse_error(error_message.str(), std::vector{n.begin()});
}
}
if ((not n.is<language::declaration>()) and (not n.is<language::eq_op>())) {
for (auto& child : n.children) {
check_symbol_initialization(*child, symbol_table);
}
}
}
}
} // namespace internal
void
check_symbol_initialization(const Node& n)
{
std::cerr << rang::fgB::yellow << "warning:" << rang::fg::reset
<< " symbol initialization checking not finished"
"if and loops statements are not correctly evaluated\n";
Assert(n.is_root());
std::shared_ptr symbol_table = std::make_shared<SymbolTable>();
internal::check_symbol_initialization(n, symbol_table);
std::cout << " - checked symbols initialization\n";
}
namespace internal
{
void
build_node_data_types(Node& n)
{
if (n.is<language::bloc>() or n.is<for_statement>()) {
if (!n.children.empty()) {
for (auto& child : n.children) {
build_node_data_types(*child);
}
}
n.m_data_type = DataType::void_t;
} else {
if (n.has_content()) {
if (n.is<language::true_kw>() or n.is<language::false_kw>() or n.is<language::do_kw>()) {
n.m_data_type = DataType::bool_t;
} else if (n.is<language::real>()) {
n.m_data_type = DataType::double_t;
} else if (n.is<language::integer>()) {
n.m_data_type = DataType::int_t;
} else if (n.is<language::literal>()) {
n.m_data_type = DataType::string_t;
} else if (n.is<language::cout_kw>() or n.is<language::cerr_kw>() or n.is<language::clog_kw>()) {
n.m_data_type = DataType::void_t;
} else if (n.is<language::declaration>()) {
auto& type_node = *(n.children[0]);
DataType data_type{DataType::undefined_t};
if (type_node.is<language::B_set>()) {
data_type = DataType::bool_t;
} else if (type_node.is<language::Z_set>()) {
data_type = DataType::int_t;
} else if (type_node.is<language::N_set>()) {
data_type = DataType::unsigned_int_t;
} else if (type_node.is<language::R_set>()) {
data_type = DataType::double_t;
} else if (type_node.is<language::string_type>()) {
data_type = DataType::string_t;
}
if (data_type == DataType::undefined_t) {
throw parse_error("unexpected error: invalid datatype", type_node.begin());
}
type_node.m_data_type = DataType::void_t;
n.children[1]->m_data_type = data_type;
const std::string& symbol = n.children[1]->string();
std::shared_ptr<SymbolTable>& symbol_table = n.m_symbol_table;
auto [i_symbol, found] = symbol_table->find(symbol);
Assert(found);
i_symbol->second.setDataType(data_type);
n.m_data_type = data_type;
} else if (n.is<language::name>()) {
std::shared_ptr<SymbolTable>& symbol_table = n.m_symbol_table;
auto [i_symbol, found] = symbol_table->find(n.string());
Assert(found);
n.m_data_type = i_symbol->second.dataType();
}
}
for (auto& child : n.children) {
build_node_data_types(*child);
}
if (n.is<language::break_kw>() or n.is<language::continue_kw>()) {
n.m_data_type = DataType::void_t;
} else if (n.is<language::eq_op>() or n.is<language::multiplyeq_op>() or n.is<language::divideeq_op>() or
n.is<language::pluseq_op>() or n.is<language::minuseq_op>() or n.is<language::bit_andeq_op>() or
n.is<language::bit_xoreq_op>() or n.is<language::bit_oreq_op>()) {
n.m_data_type = n.children[0]->m_data_type;
} else if (n.is<language::for_statement>()) {
n.m_data_type = DataType::void_t;
} else if (n.is<language::for_post>() or n.is<language::for_init>() or n.is<language::for_statement_bloc>()) {
n.m_data_type = DataType::void_t;
} else if (n.is<language::for_test>()) {
n.m_data_type = DataType::bool_t;
} else if (n.is<language::statement_bloc>()) {
n.m_data_type = DataType::void_t;
} else if (n.is<language::if_statement>() or n.is<language::while_statement>()) {
n.m_data_type = DataType::void_t;
if ((n.children[0]->m_data_type > DataType::double_t) or (n.children[0]->m_data_type < DataType::bool_t)) {
const DataType type_0 = n.children[0]->m_data_type;
std::ostringstream message;
message << "Cannot convert data type to boolean value\n"
<< "note: incompatible operand '" << n.children[0]->string() << " of type ' (" << dataTypeName(type_0)
<< ')' << std::ends;
throw parse_error(message.str(), n.children[0]->begin());
}
} else if (n.is<language::do_while_statement>()) {
n.m_data_type = DataType::void_t;
if ((n.children[1]->m_data_type > DataType::double_t) or (n.children[1]->m_data_type < DataType::bool_t)) {
const DataType type_0 = n.children[1]->m_data_type;
std::ostringstream message;
message << "Cannot convert data type to boolean value\n"
<< "note: incompatible operand '" << n.children[1]->string() << " of type ' (" << dataTypeName(type_0)
<< ')' << std::ends;
throw parse_error(message.str(), n.children[1]->begin());
}
} else if (n.is<language::unary_not>() or n.is<language::lesser_op>() or n.is<language::lesser_or_eq_op>() or
n.is<language::greater_op>() or n.is<language::greater_or_eq_op>() or n.is<language::eqeq_op>() or
n.is<language::not_eq_op>() or n.is<language::and_op>() or n.is<language::or_op>() or
n.is<language::xor_op>() or n.is<language::bitand_op>() or n.is<language::bitor_op>()) {
n.m_data_type = DataType::bool_t;
} else if (n.is<language::unary_minus>() or n.is<language::unary_plus>() or n.is<language::unary_plusplus>() or
n.is<language::unary_minusminus>()) {
n.m_data_type = n.children[0]->m_data_type;
} else if (n.is<language::plus_op>() or n.is<language::minus_op>() or n.is<language::multiply_op>() or
n.is<language::divide_op>()) {
const DataType type_0 = n.children[0]->m_data_type;
const DataType type_1 = n.children[1]->m_data_type;
n.m_data_type = dataTypePromotion(type_0, type_1);
if (n.m_data_type == DataType::undefined_t) {
std::ostringstream message;
message << "undefined binary operator\n"
<< "note: incompatible operand types " << n.children[0]->string() << " (" << dataTypeName(type_0)
<< ") and " << n.children[1]->string() << " (" << dataTypeName(type_1) << ')' << std::ends;
throw parse_error(message.str(), n.begin());
}
}
}
}
} // namespace internal
void
build_node_data_types(Node& n)
{
Assert(n.is_root());
n.m_data_type = DataType::void_t;
internal::build_node_data_types(n);
std::cout << " - build node data types\n";
}
namespace internal
{
void
check_node_data_types(const Node& n)
{
if (n.m_data_type == DataType::undefined_t) {
throw parse_error("unexpected error: undefined datatype for AST node for " + n.name(), n.begin());
}
for (auto& child : n.children) {
check_node_data_types(*child);
}
}
} // namespace internal
void
check_node_data_types(const Node& n)
{
Assert(n.is_root());
internal::check_node_data_types(n);
std::cout << " - checked node data types\n";
}
namespace internal
{
void
build_node_values(Node& n, std::shared_ptr<SymbolTable>& symbol_table)
{
if (n.is<language::bloc>()) {
if (!n.children.empty()) {
std::shared_ptr bloc_symbol_table = std::make_shared<SymbolTable>(symbol_table);
for (auto& child : n.children) {
build_node_values(*child, bloc_symbol_table);
}
}
n.m_data_type = DataType::void_t;
} else {
for (auto& child : n.children) {
build_node_values(*child, symbol_table);
}
if (n.has_content()) {
if (n.is<language::real>()) {
std::stringstream ss(n.string());
double v;
ss >> v;
n.m_value = v;
} else if (n.is<language::integer>()) {
std::stringstream ss(n.string());
int64_t v;
ss >> v;
n.m_value = v;
} else if (n.is<language::literal>()) {
n.m_value = unescapeString(n.string());
} else if (n.is<language::for_test>()) {
// if AST contains a for_test statement, it means that no test were
// given to the for-loop, so its value is always true
n.m_value = true;
} else if (n.is<language::true_kw>()) {
n.m_value = true;
} else if (n.is<language::false_kw>()) {
n.m_value = false;
}
}
}
}
} // namespace internal
void
build_node_values(Node& n)
{
Assert(n.is_root());
n.m_data_type = DataType::void_t;
std::shared_ptr symbol_table = std::make_shared<SymbolTable>();
internal::build_node_values(n, symbol_table);
std::cout << " - build node data types\n";
}
namespace internal
{
void
check_break_or_continue_placement(const Node& n, bool is_inside_loop)
{
if (n.is<language::for_statement>() or n.is<language::do_while_statement>() or n.is<language::while_statement>()) {
for (auto& child : n.children) {
check_break_or_continue_placement(*child, true);
}
} else if (n.is<language::break_kw>() or n.is<language::continue_kw>()) {
if (not is_inside_loop) {
std::ostringstream error_message;
error_message << "unexpected '" << rang::fgB::red << n.string() << rang::fg::reset
<< "' outside of loop or switch statement";
throw parse_error(error_message.str(), std::vector{n.begin()});
}
} else {
for (auto& child : n.children) {
check_break_or_continue_placement(*child, is_inside_loop);
}
}
}
} // namespace internal
void
check_break_or_continue_placement(const Node& n)
{
Assert(n.is_root());
internal::check_break_or_continue_placement(n, false);
}
namespace internal
{
void
simplify_declarations(Node& n)
{
if (n.is<language::declaration>()) {
if (n.children.size() == 3) {
n.children[0] = std::move(n.children[1]);
n.children[1] = std::move(n.children[2]);
n.children.resize(2);
n.id = typeid(language::eq_op);
}
} else {
for (auto& child : n.children) {
simplify_declarations(*child);
}
}
}
} // namespace internal
void
simplify_declarations(Node& n)
{
Assert(n.is_root());
internal::simplify_declarations(n);
}
} // namespace language
void
parser(const std::string& filename)
{
const size_t grammar_issues = analyze<language::grammar>();
std::cout << rang::fgB::yellow << "grammar_issues=" << rang::fg::reset << grammar_issues << '\n';
std::cout << rang::style::bold << "Parsing file " << rang::style::reset << rang::style::underline << filename
<< rang::style::reset << " ...\n";
std::unique_ptr<language::Node> root_node;
read_input input(filename);
try {
root_node = ASTBuilder::build(input);
std::cout << " - AST is built ...... [done]\n";
language::ASTSymbolTableBuilder{*root_node};
// language::build_symbol_table_and_check_declarations(*root_node);
language::check_symbol_initialization(*root_node);
{
std::string dot_filename{"parse_tree.dot"};
std::ofstream fout(dot_filename);
language::print_dot(fout, *root_node);
std::cout << " AST dot file: " << dot_filename << '\n';
}
language::build_node_data_types(*root_node);
language::check_node_data_types(*root_node);
language::build_node_values(*root_node);
language::check_break_or_continue_placement(*root_node);
// optimizations
language::simplify_declarations(*root_node);
language::build_node_type(*root_node);
std::cout << language::ASTPrinter{*root_node} << '\n';
language::ExecUntilBreakOrContinue exec_all;
root_node->execute(exec_all);
std::cout << *(root_node->m_symbol_table) << '\n';
}
catch (const parse_error& e) {
const auto p = e.positions.front();
std::cerr << rang::style::bold << p.source << ':' << p.line << ':' << p.byte_in_line << ": " << rang::style::reset
<< rang::fgB::red << "error: " << rang::fg::reset << rang::style::bold << e.what() << rang::style::reset
<< '\n'
<< input.line_at(p) << '\n'
<< std::string(p.byte_in_line, ' ') << rang::fgB::yellow << '^' << rang::fg::reset << std::endl;
std::exit(1);
}
std::cout << "Parsed: " << filename << '\n';
}