X-Git-Url: http://git.localhorst.tv/?a=blobdiff_plain;f=src%2Floader%2FTokenizer.h;h=28d808f0da24d3ea05a42a58f271cacab8a4abd1;hb=242bdb40a032306ad2bf57487e7f23a0c308fe44;hp=f761e5fb725d590cc057b8709a4d94ca8bca091c;hpb=d1c2bdda89b4e5542fd09eaa548e5365f20c592a;p=l2e.git diff --git a/src/loader/Tokenizer.h b/src/loader/Tokenizer.h index f761e5f..28d808f 100644 --- a/src/loader/Tokenizer.h +++ b/src/loader/Tokenizer.h @@ -1,15 +1,9 @@ -/* - * Tokenizer.h - * - * Created on: Aug 26, 2012 - * Author: holy - */ - #ifndef LOADER_TOKENIZER_H_ #define LOADER_TOKENIZER_H_ #include #include +#include #include namespace loader { @@ -17,7 +11,8 @@ namespace loader { class Tokenizer { public: - Tokenizer(std::istream &in) : in(in), isPutback(false) { } + explicit Tokenizer(std::istream &in) + : in(in), line(1), isPutback(false), skipComments(true) { } ~Tokenizer() { } private: Tokenizer(const Tokenizer &); @@ -27,7 +22,7 @@ public: struct Token { enum Type { - UNKNOWN, + UNKNOWN = 0, ANGLE_BRACKET_OPEN = '{', ANGLE_BRACKET_CLOSE = '}', CHEVRON_OPEN = '<', @@ -36,18 +31,25 @@ public: COMMA = ',', BRACKET_OPEN = '[', BRACKET_CLOSE = ']', - NUMBER, - STRING, - KEYWORD_EXPORT, - KEYWORD_FALSE, - KEYWORD_INCLUDE, - KEYWORD_TRUE, - IDENTIFIER, - TYPE_NAME, + PARENTHESIS_OPEN = '(', + PARENTHESIS_CLOSE = ')', + NUMBER = '0', + STRING = '"', + KEYWORD_EXPORT = 'e', + KEYWORD_FALSE = 'f', + KEYWORD_INCLUDE = 'i', + KEYWORD_TRUE = 't', + IDENTIFIER = 'x', + TYPE_NAME = 'n', + COMMENT = 'c', + COMMAND = '$', + REGISTER = '%', + SCRIPT_BEGIN = 's', + SCRIPT_END = 'S', }; Token() : type(UNKNOWN), number(0) { } - Token(Type t) : type(t), number(0) { } + explicit Token(Type t) : type(t), number(0) { } Type type; std::string str; @@ -55,84 +57,99 @@ public: }; + class LexerError: public std::runtime_error { + public: + LexerError(int line, const std::string &msg) + : std::runtime_error(msg), line(line) { } + int Line() const { return line; } + private: + int line; + }; + bool HasMore(); Token GetNext(); + const Token &Peek(); void Putback(const Token &); + int Line() const { return line; } private: + void ScanSpace(); Token ReadToken(); Token ReadNumber(); Token ReadString(); Token ReadIdentifier(); + Token ReadComment(); + Token ReadMultilineComment(); + bool CheckKeyword(Token &); private: std::istream ∈ Token putback; + int line; bool isPutback; + bool skipComments; }; - -std::ostream &operator <<(std::ostream &out, Tokenizer::Token::Type t) { +inline const char *TokenTypeToString(Tokenizer::Token::Type t) { switch (t) { case Tokenizer::Token::ANGLE_BRACKET_OPEN: - out << "ANGLE_BRACKET_OPEN"; - break; + return "ANGLE_BRACKET_OPEN"; case Tokenizer::Token::ANGLE_BRACKET_CLOSE: - out << "ANGLE_BRACKET_CLOSE"; - break; + return "ANGLE_BRACKET_CLOSE"; case Tokenizer::Token::CHEVRON_OPEN: - out << "CHEVRON_OPEN"; - break; + return "CHEVRON_OPEN"; case Tokenizer::Token::CHEVRON_CLOSE: - out << "CHEVRON_CLOSE"; - break; + return "CHEVRON_CLOSE"; case Tokenizer::Token::COLON: - out << "COLON"; - break; + return "COLON"; case Tokenizer::Token::COMMA: - out << "COMMA"; - break; + return "COMMA"; + case Tokenizer::Token::COMMAND: + return "COMMAND"; case Tokenizer::Token::BRACKET_OPEN: - out << "BRACKET_OPEN"; - break; + return "BRACKET_OPEN"; case Tokenizer::Token::BRACKET_CLOSE: - out << "BRACKET_CLOSE"; - break; + return "BRACKET_CLOSE"; + case Tokenizer::Token::PARENTHESIS_OPEN: + return "PARENTHESIS_OPEN"; + case Tokenizer::Token::PARENTHESIS_CLOSE: + return "PARENTHESIS_CLOSE"; case Tokenizer::Token::NUMBER: - out << "NUMBER"; - break; + return "NUMBER"; + case Tokenizer::Token::REGISTER: + return "REGISTER"; + case Tokenizer::Token::SCRIPT_BEGIN: + return "SCRIPT_BEGIN"; + case Tokenizer::Token::SCRIPT_END: + return "SCRIPT_END"; case Tokenizer::Token::STRING: - out << "STRING"; - break; + return "STRING"; case Tokenizer::Token::KEYWORD_EXPORT: - out << "KEYWORD_EXPORT"; - break; + return "KEYWORD_EXPORT"; case Tokenizer::Token::KEYWORD_FALSE: - out << "KEYWORD_FALSE"; - break; + return "KEYWORD_FALSE"; case Tokenizer::Token::KEYWORD_INCLUDE: - out << "KEYWORD_INCLUDE"; - break; + return "KEYWORD_INCLUDE"; case Tokenizer::Token::KEYWORD_TRUE: - out << "KEYWORD_TRUE"; - break; + return "KEYWORD_TRUE"; case Tokenizer::Token::IDENTIFIER: - out << "IDENTIFIER"; - break; + return "IDENTIFIER"; case Tokenizer::Token::TYPE_NAME: - out << "TYPE_NAME"; - break; + return "TYPE_NAME"; default: - out << "UNKNOWN"; - break; + return "UNKNOWN"; } +} + +inline std::ostream &operator <<(std::ostream &out, Tokenizer::Token::Type t) { + out << TokenTypeToString(t); return out; } } -#endif /* LOADER_TOKENIZER_H_ */ +#endif