X-Git-Url: http://git.localhorst.tv/?a=blobdiff_plain;f=src%2Floader%2FTokenizer.cpp;h=91ad8b9da7ca263db7d84223a8de2cce8228972a;hb=963ed8ddb873e0638b55b38c5590db75db8254c8;hp=828f6ed131afe44118e8b86aead5b2c2235284ea;hpb=d1bfdd8fcb19f27d63400109810de168d0a8ab1a;p=l2e.git diff --git a/src/loader/Tokenizer.cpp b/src/loader/Tokenizer.cpp index 828f6ed..91ad8b9 100644 --- a/src/loader/Tokenizer.cpp +++ b/src/loader/Tokenizer.cpp @@ -12,19 +12,56 @@ namespace loader { bool Tokenizer::HasMore() { - return in; + if (isPutback) return true; + ScanSpace(); + if (!in) return false; + + putback = ReadToken(); + isPutback = true; + if (!skipComments || putback.type != Token::COMMENT) return true; + + while (in && putback.type == Token::COMMENT) { + putback = ReadToken(); + ScanSpace(); + } + return putback.type != Token::COMMENT; +} + +void Tokenizer::ScanSpace() { + std::istream::char_type c; + in.get(c); + while (in && std::isspace(c)) { + if (c == '\n') { + ++line; + } + in.get(c); + } + if (in) { + in.putback(c); + } } void Tokenizer::Putback(const Token &t) { if (isPutback) { - throw LexerError("Tokenizer: double putback not supported"); + throw LexerError(line, "Tokenizer: double putback not supported"); } else { putback = t; isPutback = true; } } +const Tokenizer::Token &Tokenizer::Peek() { + if (!isPutback) { + putback = GetNext(); + isPutback = true; + } + return putback; +} + Tokenizer::Token Tokenizer::GetNext() { + if (!HasMore()) { + throw LexerError(line, "read beyond last token"); + } if (isPutback) { isPutback = false; return putback; @@ -34,21 +71,41 @@ Tokenizer::Token Tokenizer::GetNext() { } Tokenizer::Token Tokenizer::ReadToken() { + ScanSpace(); std::istream::char_type c; in.get(c); - while (std::isspace(c)) in.get(c); switch (c) { case Token::ANGLE_BRACKET_OPEN: case Token::ANGLE_BRACKET_CLOSE: - case Token::CHEVRON_OPEN: - case Token::CHEVRON_CLOSE: case Token::COLON: case Token::COMMA: case Token::BRACKET_OPEN: case Token::BRACKET_CLOSE: case Token::PARENTHESIS_OPEN: case Token::PARENTHESIS_CLOSE: + case Token::COMMAND: + case Token::REGISTER: return Token ((Token::Type) c); + case '<': { + std::istream::char_type c2; + in.get(c2); + if (c2 == '<') { + return Token(Token::SCRIPT_BEGIN); + } else { + in.putback(c2); + return Token(Token::CHEVRON_OPEN); + } + } + case '>': { + std::istream::char_type c2; + in.get(c2); + if (c2 == '>') { + return Token(Token::SCRIPT_END); + } else { + in.putback(c2); + return Token(Token::CHEVRON_CLOSE); + } + } case '+': case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -57,6 +114,19 @@ Tokenizer::Token Tokenizer::ReadToken() { case '"': in.putback(c); return ReadString(); + case '/': + { + std::istream::char_type c2; + in.get(c2); + if (c2 == '/') { + return ReadComment(); + } else if (c2 == '*') { + return ReadMultilineComment(); + } else { + throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c + c2 + ": expected / or *"); + } + } + break; default: in.putback(c); { @@ -66,7 +136,7 @@ Tokenizer::Token Tokenizer::ReadToken() { } else if (std::islower(c)) { CheckKeyword(t); } else { - throw LexerError(std::string("Tokenizer: cannot parse token: ") + c); + throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c); } return t; } @@ -106,7 +176,7 @@ Tokenizer::Token Tokenizer::ReadString() { std::istream::char_type c; in.get(c); if (c != '"') { - throw LexerError("Tokenizer: strings must begin with '\"'"); + throw LexerError(line, "Tokenizer: strings must begin with '\"'"); } while (in.get(c)) { @@ -154,6 +224,28 @@ Tokenizer::Token Tokenizer::ReadIdentifier() { return t; } +Tokenizer::Token Tokenizer::ReadComment() { + std::istream::char_type c; + while (in.get(c) && c != '\n'); + ++line; + return Token(Token::COMMENT); +} + +Tokenizer::Token Tokenizer::ReadMultilineComment() { + std::istream::char_type c; + while (in.get(c)) { + if (c == '*') { + std::istream::char_type c2; + if (in.get(c2) && c2 == '/') { + break; + } + } else if (c == '\n') { + ++line; + } + } + return Token(Token::COMMENT); +} + bool Tokenizer::CheckKeyword(Token &t) { if (t.str == "export") { t.type = Token::KEYWORD_EXPORT;