]> git.localhorst.tv Git - l2e.git/blobdiff - src/loader/Tokenizer.h
removed stupid file headers that eclipse put in
[l2e.git] / src / loader / Tokenizer.h
index f363af70bd0169684a247c3bb45262b6dbe6542a..06103b7d8b6f41378a32839405f1d58df4964755 100644 (file)
@@ -1,14 +1,9 @@
-/*
- * Tokenizer.h
- *
- *  Created on: Aug 26, 2012
- *      Author: holy
- */
-
 #ifndef LOADER_TOKENIZER_H_
 #define LOADER_TOKENIZER_H_
 
 #include <iosfwd>
+#include <ostream>
+#include <stdexcept>
 #include <string>
 
 namespace loader {
@@ -16,7 +11,8 @@ namespace loader {
 class Tokenizer {
 
 public:
-       Tokenizer(std::istream &in) : in(in), isPutback(false) { }
+       explicit Tokenizer(std::istream &in)
+       : in(in), line(1), isPutback(false), skipComments(true) { }
        ~Tokenizer() { }
 private:
        Tokenizer(const Tokenizer &);
@@ -26,7 +22,7 @@ public:
        struct Token {
 
                enum Type {
-                       UNKNOWN,
+                       UNKNOWN = 0,
                        ANGLE_BRACKET_OPEN = '{',
                        ANGLE_BRACKET_CLOSE = '}',
                        CHEVRON_OPEN = '<',
@@ -35,18 +31,25 @@ public:
                        COMMA = ',',
                        BRACKET_OPEN = '[',
                        BRACKET_CLOSE = ']',
-                       NUMBER,
-                       STRING,
-                       KEYWORD_EXPORT,
-                       KEYWORD_FALSE,
-                       KEYWORD_INCLUDE,
-                       KEYWORD_TRUE,
-                       IDENTIFIER,
-                       TYPE_NAME,
+                       PARENTHESIS_OPEN = '(',
+                       PARENTHESIS_CLOSE = ')',
+                       NUMBER = '0',
+                       STRING = '"',
+                       KEYWORD_EXPORT = 'e',
+                       KEYWORD_FALSE = 'f',
+                       KEYWORD_INCLUDE = 'i',
+                       KEYWORD_TRUE = 't',
+                       IDENTIFIER = 'x',
+                       TYPE_NAME = 'n',
+                       COMMENT = 'c',
+                       COMMAND = '$',
+                       REGISTER = '%',
+                       SCRIPT_BEGIN = 's',
+                       SCRIPT_END = 'S',
                };
 
                Token() : type(UNKNOWN), number(0) { }
-               Token(Type t) : type(t), number(0) { }
+               explicit Token(Type t) : type(t), number(0) { }
 
                Type type;
                std::string str;
@@ -54,26 +57,99 @@ public:
 
        };
 
+       class LexerError: public std::runtime_error {
+       public:
+               LexerError(int line, const std::string &msg)
+               : std::runtime_error(msg), line(line) { }
+               int Line() const { return line; }
+       private:
+               int line;
+       };
+
        bool HasMore();
        Token GetNext();
+       const Token &Peek();
        void Putback(const Token &);
+       int Line() const { return line; }
 
 private:
+       void ScanSpace();
        Token ReadToken();
 
        Token ReadNumber();
        Token ReadString();
        Token ReadIdentifier();
 
+       Token ReadComment();
+       Token ReadMultilineComment();
+
        bool CheckKeyword(Token &);
 
 private:
        std::istream &in;
        Token putback;
+       int line;
        bool isPutback;
+       bool skipComments;
 
 };
 
+inline const char *TokenTypeToString(Tokenizer::Token::Type t) {
+       switch (t) {
+               case Tokenizer::Token::ANGLE_BRACKET_OPEN:
+                       return "ANGLE_BRACKET_OPEN";
+               case Tokenizer::Token::ANGLE_BRACKET_CLOSE:
+                       return "ANGLE_BRACKET_CLOSE";
+               case Tokenizer::Token::CHEVRON_OPEN:
+                       return "CHEVRON_OPEN";
+               case Tokenizer::Token::CHEVRON_CLOSE:
+                       return "CHEVRON_CLOSE";
+               case Tokenizer::Token::COLON:
+                       return "COLON";
+               case Tokenizer::Token::COMMA:
+                       return "COMMA";
+               case Tokenizer::Token::COMMAND:
+                       return "COMMAND";
+               case Tokenizer::Token::BRACKET_OPEN:
+                       return "BRACKET_OPEN";
+               case Tokenizer::Token::BRACKET_CLOSE:
+                       return "BRACKET_CLOSE";
+               case Tokenizer::Token::PARENTHESIS_OPEN:
+                       return "PARENTHESIS_OPEN";
+               case Tokenizer::Token::PARENTHESIS_CLOSE:
+                       return "PARENTHESIS_CLOSE";
+               case Tokenizer::Token::NUMBER:
+                       return "NUMBER";
+               case Tokenizer::Token::REGISTER:
+                       return "REGISTER";
+               case Tokenizer::Token::SCRIPT_BEGIN:
+                       return "SCRIPT_BEGIN";
+               case Tokenizer::Token::SCRIPT_END:
+                       return "SCRIPT_END";
+               case Tokenizer::Token::STRING:
+                       return "STRING";
+               case Tokenizer::Token::KEYWORD_EXPORT:
+                       return "KEYWORD_EXPORT";
+               case Tokenizer::Token::KEYWORD_FALSE:
+                       return "KEYWORD_FALSE";
+               case Tokenizer::Token::KEYWORD_INCLUDE:
+                       return "KEYWORD_INCLUDE";
+               case Tokenizer::Token::KEYWORD_TRUE:
+                       return "KEYWORD_TRUE";
+               case Tokenizer::Token::IDENTIFIER:
+                       return "IDENTIFIER";
+               case Tokenizer::Token::TYPE_NAME:
+                       return "TYPE_NAME";
+               default:
+                       return "UNKNOWN";
+       }
+}
+
+inline std::ostream &operator <<(std::ostream &out, Tokenizer::Token::Type t) {
+       out << TokenTypeToString(t);
+       return out;
+}
+
 }
 
 #endif /* LOADER_TOKENIZER_H_ */