]> git.localhorst.tv Git - l2e.git/blobdiff - src/loader/Tokenizer.cpp
removed stupid file headers that eclipse put in
[l2e.git] / src / loader / Tokenizer.cpp
index 828f6ed131afe44118e8b86aead5b2c2235284ea..4a8138c2adab622fb911adae564d1faf58d1210f 100644 (file)
@@ -1,10 +1,3 @@
-/*
- * Tokenizer.cpp
- *
- *  Created on: Aug 26, 2012
- *      Author: holy
- */
-
 #include "Tokenizer.h"
 
 #include <istream>
 namespace loader {
 
 bool Tokenizer::HasMore() {
-       return in;
+       if (isPutback) return true;
+       ScanSpace();
+       if (!in) return false;
+
+       putback = ReadToken();
+       isPutback = true;
+       if (!skipComments || putback.type != Token::COMMENT) return true;
+
+       while (in && putback.type == Token::COMMENT) {
+               putback = ReadToken();
+               ScanSpace();
+       }
+       return putback.type != Token::COMMENT;
+}
+
+void Tokenizer::ScanSpace() {
+       std::istream::char_type c;
+       in.get(c);
+       while (in && std::isspace(c)) {
+               if (c == '\n') {
+                       ++line;
+               }
+               in.get(c);
+       }
+       if (in) {
+               in.putback(c);
+       }
 }
 
 void Tokenizer::Putback(const Token &t) {
        if (isPutback) {
-               throw LexerError("Tokenizer: double putback not supported");
+               throw LexerError(line, "Tokenizer: double putback not supported");
        } else {
                putback = t;
                isPutback = true;
        }
 }
 
+const Tokenizer::Token &Tokenizer::Peek() {
+       if (!isPutback) {
+               putback = GetNext();
+               isPutback = true;
+       }
+       return putback;
+}
+
 Tokenizer::Token Tokenizer::GetNext() {
+       if (!HasMore()) {
+               throw LexerError(line, "read beyond last token");
+       }
        if (isPutback) {
                isPutback = false;
                return putback;
@@ -34,21 +64,41 @@ Tokenizer::Token Tokenizer::GetNext() {
 }
 
 Tokenizer::Token Tokenizer::ReadToken() {
+       ScanSpace();
        std::istream::char_type c;
        in.get(c);
-       while (std::isspace(c)) in.get(c);
        switch (c) {
                case Token::ANGLE_BRACKET_OPEN:
                case Token::ANGLE_BRACKET_CLOSE:
-               case Token::CHEVRON_OPEN:
-               case Token::CHEVRON_CLOSE:
                case Token::COLON:
                case Token::COMMA:
                case Token::BRACKET_OPEN:
                case Token::BRACKET_CLOSE:
                case Token::PARENTHESIS_OPEN:
                case Token::PARENTHESIS_CLOSE:
+               case Token::COMMAND:
+               case Token::REGISTER:
                        return Token ((Token::Type) c);
+               case '<': {
+                       std::istream::char_type c2;
+                       in.get(c2);
+                       if (c2 == '<') {
+                               return Token(Token::SCRIPT_BEGIN);
+                       } else {
+                               in.putback(c2);
+                               return Token(Token::CHEVRON_OPEN);
+                       }
+               }
+               case '>': {
+                       std::istream::char_type c2;
+                       in.get(c2);
+                       if (c2 == '>') {
+                               return Token(Token::SCRIPT_END);
+                       } else {
+                               in.putback(c2);
+                               return Token(Token::CHEVRON_CLOSE);
+                       }
+               }
                case '+': case '-':
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
@@ -57,6 +107,19 @@ Tokenizer::Token Tokenizer::ReadToken() {
                case '"':
                        in.putback(c);
                        return ReadString();
+               case '/':
+                       {
+                               std::istream::char_type c2;
+                               in.get(c2);
+                               if (c2 == '/') {
+                                       return ReadComment();
+                               } else if (c2 == '*') {
+                                       return ReadMultilineComment();
+                               } else {
+                                       throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c + c2 + ": expected / or *");
+                               }
+                       }
+                       break;
                default:
                        in.putback(c);
                        {
@@ -66,7 +129,7 @@ Tokenizer::Token Tokenizer::ReadToken() {
                                } else if (std::islower(c)) {
                                        CheckKeyword(t);
                                } else {
-                                       throw LexerError(std::string("Tokenizer: cannot parse token: ") + c);
+                                       throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c);
                                }
                                return t;
                        }
@@ -106,7 +169,7 @@ Tokenizer::Token Tokenizer::ReadString() {
        std::istream::char_type c;
        in.get(c);
        if (c != '"') {
-               throw LexerError("Tokenizer: strings must begin with '\"'");
+               throw LexerError(line, "Tokenizer: strings must begin with '\"'");
        }
 
        while (in.get(c)) {
@@ -154,6 +217,28 @@ Tokenizer::Token Tokenizer::ReadIdentifier() {
        return t;
 }
 
+Tokenizer::Token Tokenizer::ReadComment() {
+       std::istream::char_type c;
+       while (in.get(c) && c != '\n');
+       ++line;
+       return Token(Token::COMMENT);
+}
+
+Tokenizer::Token Tokenizer::ReadMultilineComment() {
+       std::istream::char_type c;
+       while (in.get(c)) {
+               if (c == '*') {
+                       std::istream::char_type c2;
+                       if (in.get(c2) && c2 == '/') {
+                               break;
+                       }
+               } else if (c == '\n') {
+                       ++line;
+               }
+       }
+       return Token(Token::COMMENT);
+}
+
 bool Tokenizer::CheckKeyword(Token &t) {
        if (t.str == "export") {
                t.type = Token::KEYWORD_EXPORT;