X-Git-Url: http://git.localhorst.tv/?a=blobdiff_plain;f=src%2Floader%2FTokenizer.cpp;h=4a8138c2adab622fb911adae564d1faf58d1210f;hb=cc3d698b8c1ad09d7a3f9e3f28bc84e0ac1735ea;hp=76a3a14e5fc67b3a8c10f9706ed30d84af6863ec;hpb=996552758ec3bc748081b65b26b4a61bcd619324;p=l2e.git diff --git a/src/loader/Tokenizer.cpp b/src/loader/Tokenizer.cpp index 76a3a14..4a8138c 100644 --- a/src/loader/Tokenizer.cpp +++ b/src/loader/Tokenizer.cpp @@ -1,31 +1,60 @@ -/* - * Tokenizer.cpp - * - * Created on: Aug 26, 2012 - * Author: holy - */ - #include "Tokenizer.h" #include -#include namespace loader { bool Tokenizer::HasMore() { - return in; + if (isPutback) return true; + ScanSpace(); + if (!in) return false; + + putback = ReadToken(); + isPutback = true; + if (!skipComments || putback.type != Token::COMMENT) return true; + + while (in && putback.type == Token::COMMENT) { + putback = ReadToken(); + ScanSpace(); + } + return putback.type != Token::COMMENT; +} + +void Tokenizer::ScanSpace() { + std::istream::char_type c; + in.get(c); + while (in && std::isspace(c)) { + if (c == '\n') { + ++line; + } + in.get(c); + } + if (in) { + in.putback(c); + } } void Tokenizer::Putback(const Token &t) { if (isPutback) { - throw std::runtime_error("Tokenizer: double putback not supported"); + throw LexerError(line, "Tokenizer: double putback not supported"); } else { putback = t; isPutback = true; } } +const Tokenizer::Token &Tokenizer::Peek() { + if (!isPutback) { + putback = GetNext(); + isPutback = true; + } + return putback; +} + Tokenizer::Token Tokenizer::GetNext() { + if (!HasMore()) { + throw LexerError(line, "read beyond last token"); + } if (isPutback) { isPutback = false; return putback; @@ -35,19 +64,41 @@ Tokenizer::Token Tokenizer::GetNext() { } Tokenizer::Token Tokenizer::ReadToken() { + ScanSpace(); std::istream::char_type c; in.get(c); - while (std::isspace(c)) in.get(c); switch (c) { case Token::ANGLE_BRACKET_OPEN: case Token::ANGLE_BRACKET_CLOSE: - case Token::CHEVRON_OPEN: - case Token::CHEVRON_CLOSE: case Token::COLON: case Token::COMMA: case Token::BRACKET_OPEN: case Token::BRACKET_CLOSE: + case Token::PARENTHESIS_OPEN: + case Token::PARENTHESIS_CLOSE: + case Token::COMMAND: + case Token::REGISTER: return Token ((Token::Type) c); + case '<': { + std::istream::char_type c2; + in.get(c2); + if (c2 == '<') { + return Token(Token::SCRIPT_BEGIN); + } else { + in.putback(c2); + return Token(Token::CHEVRON_OPEN); + } + } + case '>': { + std::istream::char_type c2; + in.get(c2); + if (c2 == '>') { + return Token(Token::SCRIPT_END); + } else { + in.putback(c2); + return Token(Token::CHEVRON_CLOSE); + } + } case '+': case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -56,6 +107,19 @@ Tokenizer::Token Tokenizer::ReadToken() { case '"': in.putback(c); return ReadString(); + case '/': + { + std::istream::char_type c2; + in.get(c2); + if (c2 == '/') { + return ReadComment(); + } else if (c2 == '*') { + return ReadMultilineComment(); + } else { + throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c + c2 + ": expected / or *"); + } + } + break; default: in.putback(c); { @@ -65,7 +129,7 @@ Tokenizer::Token Tokenizer::ReadToken() { } else if (std::islower(c)) { CheckKeyword(t); } else { - throw std::runtime_error(std::string("Tokenizer: cannot parse token: ") + c); + throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c); } return t; } @@ -105,7 +169,7 @@ Tokenizer::Token Tokenizer::ReadString() { std::istream::char_type c; in.get(c); if (c != '"') { - throw std::runtime_error("Tokenizer: strings must begin with '\"'"); + throw LexerError(line, "Tokenizer: strings must begin with '\"'"); } while (in.get(c)) { @@ -153,6 +217,28 @@ Tokenizer::Token Tokenizer::ReadIdentifier() { return t; } +Tokenizer::Token Tokenizer::ReadComment() { + std::istream::char_type c; + while (in.get(c) && c != '\n'); + ++line; + return Token(Token::COMMENT); +} + +Tokenizer::Token Tokenizer::ReadMultilineComment() { + std::istream::char_type c; + while (in.get(c)) { + if (c == '*') { + std::istream::char_type c2; + if (in.get(c2) && c2 == '/') { + break; + } + } else if (c == '\n') { + ++line; + } + } + return Token(Token::COMMENT); +} + bool Tokenizer::CheckKeyword(Token &t) { if (t.str == "export") { t.type = Token::KEYWORD_EXPORT;