From 8d3decb97c252bc7b0ba4b9b5def0b04dfcb8e00 Mon Sep 17 00:00:00 2001 From: Daniel Karbach Date: Sun, 26 Aug 2012 21:43:34 +0200 Subject: [PATCH] made parsing exceptions a little more informative --- src/loader/Parser.cpp | 87 ++++++++++++++++++++++------------------ src/loader/Parser.h | 17 ++++++-- src/loader/Tokenizer.cpp | 25 +++++++++--- src/loader/Tokenizer.h | 11 ++++- 4 files changed, 90 insertions(+), 50 deletions(-) diff --git a/src/loader/Parser.cpp b/src/loader/Parser.cpp index 2cf1a34..cbb006c 100644 --- a/src/loader/Parser.cpp +++ b/src/loader/Parser.cpp @@ -24,7 +24,7 @@ void Parser::Parse() { } void Parser::ParseStatement() { - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); switch (t.type) { case Tokenizer::Token::KEYWORD_EXPORT: ParseExportDirective(); @@ -40,12 +40,20 @@ void Parser::ParseStatement() { } break; default: - throw ParseError(string("unexpected token ") + TokenTypeToString(t.type)); + throw Error(file, tok.Line(), string("unexpected token ") + TokenTypeToString(t.type)); + } +} + +Tokenizer::Token Parser::GetToken() { + try { + return tok.GetNext(); + } catch (Tokenizer::LexerError &e) { + throw Error(file, e.Line(), e.what()); } } void Parser::ParseExportDirective() { - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); if (t.type != Tokenizer::Token::IDENTIFIER) { tok.Putback(t); Declaration *decl(ProbeDefinition()); @@ -56,10 +64,9 @@ void Parser::ParseExportDirective() { } void Parser::ParseIncludeDirective() { - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); AssertTokenType(t.type, Tokenizer::Token::STRING); - ifstream file(t.str.c_str()); // TODO: resolve path name - Parser sub(file, product); + Parser sub(t.str.c_str(), product); // TODO: resolve path name sub.Parse(); } @@ -67,7 +74,7 @@ Declaration *Parser::ProbeDefinition() { string typeName(ParseTypeName()); string identifier(ParseIdentifier()); - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); tok.Putback(t); if (BeginOfPropertyList(t)) { PropertyList *propertyList(ParsePropertyList()); @@ -108,7 +115,7 @@ Definition *Parser::ParseDefinition() { string typeName(ParseTypeName()); string identifier(ParseIdentifier()); - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); tok.Putback(t); if (BeginOfPropertyList(t)) { PropertyList *propertyList(ParsePropertyList()); @@ -121,41 +128,41 @@ Definition *Parser::ParseDefinition() { dfn->SetValue(literal); return dfn; } else { - throw ParseError(string("unexpected token ") + TokenTypeToString(t.type) + ", expected property-list or literal"); + throw Error(file, tok.Line(), string("unexpected token ") + TokenTypeToString(t.type) + ", expected property-list or literal"); } } string Parser::ParseIdentifier() { - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); AssertTokenType(t.type, Tokenizer::Token::IDENTIFIER); return t.str; } string Parser::ParseTypeName() { - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); AssertTokenType(t.type, Tokenizer::Token::TYPE_NAME); return t.str; } PropertyList *Parser::ParsePropertyList() { - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); AssertTokenType(t.type, Tokenizer::Token::ANGLE_BRACKET_OPEN); auto_ptr props(new PropertyList); while (t.type != Tokenizer::Token::ANGLE_BRACKET_CLOSE) { - Tokenizer::Token name(tok.GetNext()); + Tokenizer::Token name(GetToken()); AssertTokenType(name.type, Tokenizer::Token::IDENTIFIER); - t = tok.GetNext(); + t = GetToken(); AssertTokenType(t.type, Tokenizer::Token::COLON); Value *value(ParseValue()); props->SetProperty(name.str, value); - t = tok.GetNext(); + t = GetToken(); if (t.type != Tokenizer::Token::ANGLE_BRACKET_CLOSE && t.type != Tokenizer::Token::COMMA) { - throw ParseError(string("unexpected token ") + TokenTypeToString(t.type) + ", expected , or }"); + throw Error(file, tok.Line(), string("unexpected token ") + TokenTypeToString(t.type) + ", expected , or }"); } } @@ -163,7 +170,7 @@ PropertyList *Parser::ParsePropertyList() { } Value *Parser::ParseValue() { - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); if (t.type == Tokenizer::Token::IDENTIFIER) { return new Value(t.str); } else if (BeginningOfLiteral(t)) { @@ -171,12 +178,12 @@ Value *Parser::ParseValue() { Literal *literal(ParseLiteral()); return new Value(literal); } else { - throw new ParseError(string("unexpected token ") + TokenTypeToString(t.type) + ", expected literal or identifier"); + throw Error(file, tok.Line(), string("unexpected token ") + TokenTypeToString(t.type) + ", expected literal or identifier"); } } Literal *Parser::ParseLiteral() { - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); if (t.type == Tokenizer::Token::TYPE_NAME) { PropertyList *props(ParsePropertyList()); return new Literal(t.str, props); @@ -203,12 +210,12 @@ Literal *Parser::ParseLiteral() { throw std::logic_error("literal switch reached impossible default branch oO"); } } else { - throw new ParseError(string("unexpected token ") + TokenTypeToString(t.type) + ", expected type-name or primitive"); + throw new Error(file, tok.Line(), string("unexpected token ") + TokenTypeToString(t.type) + ", expected type-name or primitive"); } } Literal *Parser::ParseArray() { - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); AssertTokenType(t.type, Tokenizer::Token::BRACKET_OPEN); vector values; @@ -217,9 +224,9 @@ Literal *Parser::ParseArray() { Value *value(ParseValue()); values.push_back(value); - t = tok.GetNext(); + t = GetToken(); if (t.type != Tokenizer::Token::BRACKET_CLOSE && t.type != Tokenizer::Token::COMMA) { - throw ParseError(string("unexpected token ") + TokenTypeToString(t.type) + ", expected , or ]"); + throw Error(file, tok.Line(), string("unexpected token ") + TokenTypeToString(t.type) + ", expected , or ]"); } } @@ -228,55 +235,55 @@ Literal *Parser::ParseArray() { Literal *Parser::ParseColor() { string msg("error parsing color"); - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); AssertTokenType(t.type, Tokenizer::Token::PARENTHESIS_OPEN, msg); - Tokenizer::Token red(tok.GetNext()); + Tokenizer::Token red(GetToken()); AssertTokenType(red.type, Tokenizer::Token::NUMBER, "error parsing red component of color"); - t = tok.GetNext(); + t = GetToken(); AssertTokenType(t.type, Tokenizer::Token::COMMA, msg); - Tokenizer::Token green(tok.GetNext()); + Tokenizer::Token green(GetToken()); AssertTokenType(green.type, Tokenizer::Token::NUMBER, "error parsing green component of color"); - t = tok.GetNext(); + t = GetToken(); AssertTokenType(t.type, Tokenizer::Token::COMMA, msg); - Tokenizer::Token blue(tok.GetNext()); + Tokenizer::Token blue(GetToken()); AssertTokenType(blue.type, Tokenizer::Token::NUMBER, "error parsing blue component of color"); - t = tok.GetNext(); + t = GetToken(); if (t.type == Tokenizer::Token::BRACKET_CLOSE) { return new Literal(red.number, green.number, blue.number); } else if (t.type != Tokenizer::Token::COMMA) { - Tokenizer::Token alpha(tok.GetNext()); + Tokenizer::Token alpha(GetToken()); AssertTokenType(alpha.type, Tokenizer::Token::NUMBER, "error parsing alpha component of color"); - t = tok.GetNext(); + t = GetToken(); AssertTokenType(t.type, Tokenizer::Token::PARENTHESIS_CLOSE, msg); return new Literal(red.number, green.number, blue.number, alpha.number); } else { - throw ParseError(string("unexpected token ") + TokenTypeToString(t.type) + ", expected , or ]"); + throw Error(file, tok.Line(), string("unexpected token ") + TokenTypeToString(t.type) + ", expected , or ]"); } } Literal *Parser::ParseVector() { std::string msg("error parsing vector"); - Tokenizer::Token t(tok.GetNext()); + Tokenizer::Token t(GetToken()); AssertTokenType(t.type, Tokenizer::Token::CHEVRON_OPEN, msg); - Tokenizer::Token x(tok.GetNext()); + Tokenizer::Token x(GetToken()); AssertTokenType(x.type, Tokenizer::Token::NUMBER, "error parsing x component of vector"); - t = tok.GetNext(); + t = GetToken(); AssertTokenType(t.type, Tokenizer::Token::COMMA, msg); - Tokenizer::Token y(tok.GetNext()); + Tokenizer::Token y(GetToken()); AssertTokenType(y.type, Tokenizer::Token::NUMBER, "error parsing y component of vector"); - t = tok.GetNext(); + t = GetToken(); AssertTokenType(t.type, Tokenizer::Token::CHEVRON_CLOSE, msg); return new Literal(x.number, y.number); @@ -284,13 +291,13 @@ Literal *Parser::ParseVector() { void Parser::AssertTokenType(Tokenizer::Token::Type actual, Tokenizer::Token::Type expected) { if (expected != actual) { - throw ParseError(string("unexpected token ") + TokenTypeToString(actual) + ", expected " + TokenTypeToString(expected)); + throw Error(file, tok.Line(), string("unexpected token ") + TokenTypeToString(actual) + ", expected " + TokenTypeToString(expected)); } } void Parser::AssertTokenType(Tokenizer::Token::Type actual, Tokenizer::Token::Type expected, const string &msg) { if (expected != actual) { - throw ParseError(msg + ": unexpected token " + TokenTypeToString(actual) + ", expected " + TokenTypeToString(expected)); + throw Error(file, tok.Line(), msg + ": unexpected token " + TokenTypeToString(actual) + ", expected " + TokenTypeToString(expected)); } } diff --git a/src/loader/Parser.h b/src/loader/Parser.h index 923a424..b4f1b8d 100644 --- a/src/loader/Parser.h +++ b/src/loader/Parser.h @@ -11,6 +11,7 @@ #include "ParsedSource.h" #include "Tokenizer.h" +#include #include #include #include @@ -25,7 +26,8 @@ class PropertyList; class Parser { public: - Parser(std::istream &in, ParsedSource &product) : tok(in), product(product) { } + Parser(const char *file, ParsedSource &product) + : file(file), in(file), tok(in), product(product) { } ~Parser() { } private: Parser(const Parser &); @@ -35,12 +37,19 @@ public: void Parse(); public: - class ParseError: public std::runtime_error { + class Error: public std::runtime_error { public: - explicit ParseError(const std::string &msg) : std::runtime_error(msg) { }; + Error(const char *file, int line, const std::string &msg) + : std::runtime_error(msg), file(file), line(line) { }; + const char *File() const { return file; } + int Line() const { return line; } + private: + const char *file; + int line; }; private: + Tokenizer::Token GetToken(); void ParseStatement(); void ParseExportDirective(); void ParseIncludeDirective(); @@ -65,6 +74,8 @@ private: bool BeginOfPropertyList(const Tokenizer::Token &) const; private: + const char *file; + std::ifstream in; Tokenizer tok; ParsedSource &product; diff --git a/src/loader/Tokenizer.cpp b/src/loader/Tokenizer.cpp index cdabe01..995a7e2 100644 --- a/src/loader/Tokenizer.cpp +++ b/src/loader/Tokenizer.cpp @@ -12,12 +12,27 @@ namespace loader { bool Tokenizer::HasMore() { - return std::istream::sentry(in); + ScanSpace(); + return in; +} + +void Tokenizer::ScanSpace() { + std::istream::char_type c; + in.get(c); + while (in && std::isspace(c)) { + if (c == '\n') { + ++line; + } + in.get(c); + } + if (in) { + in.putback(c); + } } void Tokenizer::Putback(const Token &t) { if (isPutback) { - throw LexerError("Tokenizer: double putback not supported"); + throw LexerError(line, "Tokenizer: double putback not supported"); } else { putback = t; isPutback = true; @@ -42,9 +57,9 @@ Tokenizer::Token Tokenizer::GetNext() { } Tokenizer::Token Tokenizer::ReadToken() { + ScanSpace(); std::istream::char_type c; in.get(c); - while (std::isspace(c)) in.get(c); switch (c) { case Token::ANGLE_BRACKET_OPEN: case Token::ANGLE_BRACKET_CLOSE: @@ -74,7 +89,7 @@ Tokenizer::Token Tokenizer::ReadToken() { } else if (std::islower(c)) { CheckKeyword(t); } else { - throw LexerError(std::string("Tokenizer: cannot parse token: ") + c); + throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c); } return t; } @@ -114,7 +129,7 @@ Tokenizer::Token Tokenizer::ReadString() { std::istream::char_type c; in.get(c); if (c != '"') { - throw LexerError("Tokenizer: strings must begin with '\"'"); + throw LexerError(line, "Tokenizer: strings must begin with '\"'"); } while (in.get(c)) { diff --git a/src/loader/Tokenizer.h b/src/loader/Tokenizer.h index b7ca72f..6dda20f 100644 --- a/src/loader/Tokenizer.h +++ b/src/loader/Tokenizer.h @@ -18,7 +18,7 @@ namespace loader { class Tokenizer { public: - explicit Tokenizer(std::istream &in) : in(in), isPutback(false) { } + explicit Tokenizer(std::istream &in) : in(in), line(1), isPutback(false) { } ~Tokenizer() { } private: Tokenizer(const Tokenizer &); @@ -60,15 +60,21 @@ public: class LexerError: public std::runtime_error { public: - explicit LexerError(const std::string &msg) : std::runtime_error(msg) { } + LexerError(int line, const std::string &msg) + : std::runtime_error(msg), line(line) { } + int Line() const { return line; } + private: + int line; }; bool HasMore(); Token GetNext(); const Token &Peek(); void Putback(const Token &); + int Line() const { return line; } private: + void ScanSpace(); Token ReadToken(); Token ReadNumber(); @@ -80,6 +86,7 @@ private: private: std::istream ∈ Token putback; + int line; bool isPutback; }; -- 2.39.2