]> git.localhorst.tv Git - l2e.git/blob - Tokenizer.h
28d808f0da24d3ea05a42a58f271cacab8a4abd1
[l2e.git] / Tokenizer.h
1 #ifndef LOADER_TOKENIZER_H_
2 #define LOADER_TOKENIZER_H_
3
4 #include <iosfwd>
5 #include <ostream>
6 #include <stdexcept>
7 #include <string>
8
9 namespace loader {
10
11 class Tokenizer {
12
13 public:
14         explicit Tokenizer(std::istream &in)
15         : in(in), line(1), isPutback(false), skipComments(true) { }
16         ~Tokenizer() { }
17 private:
18         Tokenizer(const Tokenizer &);
19         Tokenizer &operator =(const Tokenizer &);
20
21 public:
22         struct Token {
23
24                 enum Type {
25                         UNKNOWN = 0,
26                         ANGLE_BRACKET_OPEN = '{',
27                         ANGLE_BRACKET_CLOSE = '}',
28                         CHEVRON_OPEN = '<',
29                         CHEVRON_CLOSE = '>',
30                         COLON = ':',
31                         COMMA = ',',
32                         BRACKET_OPEN = '[',
33                         BRACKET_CLOSE = ']',
34                         PARENTHESIS_OPEN = '(',
35                         PARENTHESIS_CLOSE = ')',
36                         NUMBER = '0',
37                         STRING = '"',
38                         KEYWORD_EXPORT = 'e',
39                         KEYWORD_FALSE = 'f',
40                         KEYWORD_INCLUDE = 'i',
41                         KEYWORD_TRUE = 't',
42                         IDENTIFIER = 'x',
43                         TYPE_NAME = 'n',
44                         COMMENT = 'c',
45                         COMMAND = '$',
46                         REGISTER = '%',
47                         SCRIPT_BEGIN = 's',
48                         SCRIPT_END = 'S',
49                 };
50
51                 Token() : type(UNKNOWN), number(0) { }
52                 explicit Token(Type t) : type(t), number(0) { }
53
54                 Type type;
55                 std::string str;
56                 int number;
57
58         };
59
60         class LexerError: public std::runtime_error {
61         public:
62                 LexerError(int line, const std::string &msg)
63                 : std::runtime_error(msg), line(line) { }
64                 int Line() const { return line; }
65         private:
66                 int line;
67         };
68
69         bool HasMore();
70         Token GetNext();
71         const Token &Peek();
72         void Putback(const Token &);
73         int Line() const { return line; }
74
75 private:
76         void ScanSpace();
77         Token ReadToken();
78
79         Token ReadNumber();
80         Token ReadString();
81         Token ReadIdentifier();
82
83         Token ReadComment();
84         Token ReadMultilineComment();
85
86         bool CheckKeyword(Token &);
87
88 private:
89         std::istream &in;
90         Token putback;
91         int line;
92         bool isPutback;
93         bool skipComments;
94
95 };
96
97 inline const char *TokenTypeToString(Tokenizer::Token::Type t) {
98         switch (t) {
99                 case Tokenizer::Token::ANGLE_BRACKET_OPEN:
100                         return "ANGLE_BRACKET_OPEN";
101                 case Tokenizer::Token::ANGLE_BRACKET_CLOSE:
102                         return "ANGLE_BRACKET_CLOSE";
103                 case Tokenizer::Token::CHEVRON_OPEN:
104                         return "CHEVRON_OPEN";
105                 case Tokenizer::Token::CHEVRON_CLOSE:
106                         return "CHEVRON_CLOSE";
107                 case Tokenizer::Token::COLON:
108                         return "COLON";
109                 case Tokenizer::Token::COMMA:
110                         return "COMMA";
111                 case Tokenizer::Token::COMMAND:
112                         return "COMMAND";
113                 case Tokenizer::Token::BRACKET_OPEN:
114                         return "BRACKET_OPEN";
115                 case Tokenizer::Token::BRACKET_CLOSE:
116                         return "BRACKET_CLOSE";
117                 case Tokenizer::Token::PARENTHESIS_OPEN:
118                         return "PARENTHESIS_OPEN";
119                 case Tokenizer::Token::PARENTHESIS_CLOSE:
120                         return "PARENTHESIS_CLOSE";
121                 case Tokenizer::Token::NUMBER:
122                         return "NUMBER";
123                 case Tokenizer::Token::REGISTER:
124                         return "REGISTER";
125                 case Tokenizer::Token::SCRIPT_BEGIN:
126                         return "SCRIPT_BEGIN";
127                 case Tokenizer::Token::SCRIPT_END:
128                         return "SCRIPT_END";
129                 case Tokenizer::Token::STRING:
130                         return "STRING";
131                 case Tokenizer::Token::KEYWORD_EXPORT:
132                         return "KEYWORD_EXPORT";
133                 case Tokenizer::Token::KEYWORD_FALSE:
134                         return "KEYWORD_FALSE";
135                 case Tokenizer::Token::KEYWORD_INCLUDE:
136                         return "KEYWORD_INCLUDE";
137                 case Tokenizer::Token::KEYWORD_TRUE:
138                         return "KEYWORD_TRUE";
139                 case Tokenizer::Token::IDENTIFIER:
140                         return "IDENTIFIER";
141                 case Tokenizer::Token::TYPE_NAME:
142                         return "TYPE_NAME";
143                 default:
144                         return "UNKNOWN";
145         }
146 }
147
148 inline std::ostream &operator <<(std::ostream &out, Tokenizer::Token::Type t) {
149         out << TokenTypeToString(t);
150         return out;
151 }
152
153 }
154
155 #endif