]> git.localhorst.tv Git - l2e.git/blob - src/loader/Tokenizer.h
864f9d2deab832e68627f724b519b5cc9ee8748e
[l2e.git] / src / loader / Tokenizer.h
1 /*
2  * Tokenizer.h
3  *
4  *  Created on: Aug 26, 2012
5  *      Author: holy
6  */
7
8 #ifndef LOADER_TOKENIZER_H_
9 #define LOADER_TOKENIZER_H_
10
11 #include <iosfwd>
12 #include <ostream>
13 #include <stdexcept>
14 #include <string>
15
16 namespace loader {
17
18 class Tokenizer {
19
20 public:
21         explicit Tokenizer(std::istream &in)
22         : in(in), line(1), isPutback(false), skipComments(true) { }
23         ~Tokenizer() { }
24 private:
25         Tokenizer(const Tokenizer &);
26         Tokenizer &operator =(const Tokenizer &);
27
28 public:
29         struct Token {
30
31                 enum Type {
32                         UNKNOWN = 0,
33                         ANGLE_BRACKET_OPEN = '{',
34                         ANGLE_BRACKET_CLOSE = '}',
35                         CHEVRON_OPEN = '<',
36                         CHEVRON_CLOSE = '>',
37                         COLON = ':',
38                         COMMA = ',',
39                         BRACKET_OPEN = '[',
40                         BRACKET_CLOSE = ']',
41                         PARENTHESIS_OPEN = '(',
42                         PARENTHESIS_CLOSE = ')',
43                         NUMBER = '0',
44                         STRING = '"',
45                         KEYWORD_EXPORT = 'e',
46                         KEYWORD_FALSE = 'f',
47                         KEYWORD_INCLUDE = 'i',
48                         KEYWORD_TRUE = 't',
49                         IDENTIFIER = 'x',
50                         TYPE_NAME = 'n',
51                         COMMENT = 'c',
52                         COMMAND = '$',
53                         REGISTER = '%',
54                         SCRIPT_BEGIN = 's',
55                         SCRIPT_END = 'S',
56                 };
57
58                 Token() : type(UNKNOWN), number(0) { }
59                 explicit Token(Type t) : type(t), number(0) { }
60
61                 Type type;
62                 std::string str;
63                 int number;
64
65         };
66
67         class LexerError: public std::runtime_error {
68         public:
69                 LexerError(int line, const std::string &msg)
70                 : std::runtime_error(msg), line(line) { }
71                 int Line() const { return line; }
72         private:
73                 int line;
74         };
75
76         bool HasMore();
77         Token GetNext();
78         const Token &Peek();
79         void Putback(const Token &);
80         int Line() const { return line; }
81
82 private:
83         void ScanSpace();
84         Token ReadToken();
85
86         Token ReadNumber();
87         Token ReadString();
88         Token ReadIdentifier();
89
90         Token ReadComment();
91         Token ReadMultilineComment();
92
93         bool CheckKeyword(Token &);
94
95 private:
96         std::istream &in;
97         Token putback;
98         int line;
99         bool isPutback;
100         bool skipComments;
101
102 };
103
104 inline const char *TokenTypeToString(Tokenizer::Token::Type t) {
105         switch (t) {
106                 case Tokenizer::Token::ANGLE_BRACKET_OPEN:
107                         return "ANGLE_BRACKET_OPEN";
108                 case Tokenizer::Token::ANGLE_BRACKET_CLOSE:
109                         return "ANGLE_BRACKET_CLOSE";
110                 case Tokenizer::Token::CHEVRON_OPEN:
111                         return "CHEVRON_OPEN";
112                 case Tokenizer::Token::CHEVRON_CLOSE:
113                         return "CHEVRON_CLOSE";
114                 case Tokenizer::Token::COLON:
115                         return "COLON";
116                 case Tokenizer::Token::COMMA:
117                         return "COMMA";
118                 case Tokenizer::Token::COMMAND:
119                         return "COMMAND";
120                 case Tokenizer::Token::BRACKET_OPEN:
121                         return "BRACKET_OPEN";
122                 case Tokenizer::Token::BRACKET_CLOSE:
123                         return "BRACKET_CLOSE";
124                 case Tokenizer::Token::PARENTHESIS_OPEN:
125                         return "PARENTHESIS_OPEN";
126                 case Tokenizer::Token::PARENTHESIS_CLOSE:
127                         return "PARENTHESIS_CLOSE";
128                 case Tokenizer::Token::NUMBER:
129                         return "NUMBER";
130                 case Tokenizer::Token::REGISTER:
131                         return "REGISTER";
132                 case Tokenizer::Token::SCRIPT_BEGIN:
133                         return "SCRIPT_BEGIN";
134                 case Tokenizer::Token::SCRIPT_END:
135                         return "SCRIPT_END";
136                 case Tokenizer::Token::STRING:
137                         return "STRING";
138                 case Tokenizer::Token::KEYWORD_EXPORT:
139                         return "KEYWORD_EXPORT";
140                 case Tokenizer::Token::KEYWORD_FALSE:
141                         return "KEYWORD_FALSE";
142                 case Tokenizer::Token::KEYWORD_INCLUDE:
143                         return "KEYWORD_INCLUDE";
144                 case Tokenizer::Token::KEYWORD_TRUE:
145                         return "KEYWORD_TRUE";
146                 case Tokenizer::Token::IDENTIFIER:
147                         return "IDENTIFIER";
148                 case Tokenizer::Token::TYPE_NAME:
149                         return "TYPE_NAME";
150                 default:
151                         return "UNKNOWN";
152         }
153 }
154
155 inline std::ostream &operator <<(std::ostream &out, Tokenizer::Token::Type t) {
156         out << TokenTypeToString(t);
157         return out;
158 }
159
160 }
161
162 #endif /* LOADER_TOKENIZER_H_ */