]> git.localhorst.tv Git - l2e.git/blob - Tokenizer.cpp
4a8138c2adab622fb911adae564d1faf58d1210f
[l2e.git] / Tokenizer.cpp
1 #include "Tokenizer.h"
2
3 #include <istream>
4
5 namespace loader {
6
7 bool Tokenizer::HasMore() {
8         if (isPutback) return true;
9         ScanSpace();
10         if (!in) return false;
11
12         putback = ReadToken();
13         isPutback = true;
14         if (!skipComments || putback.type != Token::COMMENT) return true;
15
16         while (in && putback.type == Token::COMMENT) {
17                 putback = ReadToken();
18                 ScanSpace();
19         }
20         return putback.type != Token::COMMENT;
21 }
22
23 void Tokenizer::ScanSpace() {
24         std::istream::char_type c;
25         in.get(c);
26         while (in && std::isspace(c)) {
27                 if (c == '\n') {
28                         ++line;
29                 }
30                 in.get(c);
31         }
32         if (in) {
33                 in.putback(c);
34         }
35 }
36
37 void Tokenizer::Putback(const Token &t) {
38         if (isPutback) {
39                 throw LexerError(line, "Tokenizer: double putback not supported");
40         } else {
41                 putback = t;
42                 isPutback = true;
43         }
44 }
45
46 const Tokenizer::Token &Tokenizer::Peek() {
47         if (!isPutback) {
48                 putback = GetNext();
49                 isPutback = true;
50         }
51         return putback;
52 }
53
54 Tokenizer::Token Tokenizer::GetNext() {
55         if (!HasMore()) {
56                 throw LexerError(line, "read beyond last token");
57         }
58         if (isPutback) {
59                 isPutback = false;
60                 return putback;
61         } else {
62                 return ReadToken();
63         }
64 }
65
66 Tokenizer::Token Tokenizer::ReadToken() {
67         ScanSpace();
68         std::istream::char_type c;
69         in.get(c);
70         switch (c) {
71                 case Token::ANGLE_BRACKET_OPEN:
72                 case Token::ANGLE_BRACKET_CLOSE:
73                 case Token::COLON:
74                 case Token::COMMA:
75                 case Token::BRACKET_OPEN:
76                 case Token::BRACKET_CLOSE:
77                 case Token::PARENTHESIS_OPEN:
78                 case Token::PARENTHESIS_CLOSE:
79                 case Token::COMMAND:
80                 case Token::REGISTER:
81                         return Token ((Token::Type) c);
82                 case '<': {
83                         std::istream::char_type c2;
84                         in.get(c2);
85                         if (c2 == '<') {
86                                 return Token(Token::SCRIPT_BEGIN);
87                         } else {
88                                 in.putback(c2);
89                                 return Token(Token::CHEVRON_OPEN);
90                         }
91                 }
92                 case '>': {
93                         std::istream::char_type c2;
94                         in.get(c2);
95                         if (c2 == '>') {
96                                 return Token(Token::SCRIPT_END);
97                         } else {
98                                 in.putback(c2);
99                                 return Token(Token::CHEVRON_CLOSE);
100                         }
101                 }
102                 case '+': case '-':
103                 case '0': case '1': case '2': case '3': case '4':
104                 case '5': case '6': case '7': case '8': case '9':
105                         in.putback(c);
106                         return ReadNumber();
107                 case '"':
108                         in.putback(c);
109                         return ReadString();
110                 case '/':
111                         {
112                                 std::istream::char_type c2;
113                                 in.get(c2);
114                                 if (c2 == '/') {
115                                         return ReadComment();
116                                 } else if (c2 == '*') {
117                                         return ReadMultilineComment();
118                                 } else {
119                                         throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c + c2 + ": expected / or *");
120                                 }
121                         }
122                         break;
123                 default:
124                         in.putback(c);
125                         {
126                                 Token t(ReadIdentifier());
127                                 if (std::isupper(c)) {
128                                         t.type = Token::TYPE_NAME;
129                                 } else if (std::islower(c)) {
130                                         CheckKeyword(t);
131                                 } else {
132                                         throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c);
133                                 }
134                                 return t;
135                         }
136         }
137 }
138
139 Tokenizer::Token Tokenizer::ReadNumber() {
140         Token t(Token::NUMBER);
141         bool isNegative(false);
142
143         std::istream::char_type c;
144         in.get(c);
145         if (c == '-') {
146                 isNegative = true;
147         } else if (c != '+') {
148                 in.putback(c);
149         }
150
151         while (in.get(c)) {
152                 if (!std::isdigit(c)) {
153                         in.putback(c);
154                         break;
155                 }
156                 t.number *= 10;
157                 t.number += c - '0';
158         }
159
160         if (isNegative) t.number *= -1;
161
162         return t;
163 }
164
165 Tokenizer::Token Tokenizer::ReadString() {
166         Token t(Token::STRING);
167         bool escape(false);
168
169         std::istream::char_type c;
170         in.get(c);
171         if (c != '"') {
172                 throw LexerError(line, "Tokenizer: strings must begin with '\"'");
173         }
174
175         while (in.get(c)) {
176                 if (escape) {
177                         escape = false;
178                         switch (c) {
179                                 case 'n':
180                                         t.str.push_back('\n');
181                                         break;
182                                 case 'r':
183                                         t.str.push_back('\r');
184                                         break;
185                                 case 't':
186                                         t.str.push_back('\t');
187                                         break;
188                                 default:
189                                         t.str.push_back(c);
190                                         break;
191                         }
192                 } else if (c == '"') {
193                         break;
194                 } else if (c == '\\') {
195                         escape = true;
196                 } else {
197                         t.str.push_back(c);
198                 }
199         }
200
201         return t;
202 }
203
204 Tokenizer::Token Tokenizer::ReadIdentifier() {
205         Token t(Token::IDENTIFIER);
206
207         std::istream::char_type c;
208         while (in.get(c)) {
209                 if (std::isalnum(c) || c == '_') {
210                         t.str.push_back(c);
211                 } else {
212                         in.putback(c);
213                         break;
214                 }
215         }
216
217         return t;
218 }
219
220 Tokenizer::Token Tokenizer::ReadComment() {
221         std::istream::char_type c;
222         while (in.get(c) && c != '\n');
223         ++line;
224         return Token(Token::COMMENT);
225 }
226
227 Tokenizer::Token Tokenizer::ReadMultilineComment() {
228         std::istream::char_type c;
229         while (in.get(c)) {
230                 if (c == '*') {
231                         std::istream::char_type c2;
232                         if (in.get(c2) && c2 == '/') {
233                                 break;
234                         }
235                 } else if (c == '\n') {
236                         ++line;
237                 }
238         }
239         return Token(Token::COMMENT);
240 }
241
242 bool Tokenizer::CheckKeyword(Token &t) {
243         if (t.str == "export") {
244                 t.type = Token::KEYWORD_EXPORT;
245                 return true;
246         } else if (t.str == "false") {
247                 t.type = Token::KEYWORD_FALSE;
248                 return true;
249         } else if (t.str == "include") {
250                 t.type = Token::KEYWORD_INCLUDE;
251                 return true;
252         } else if (t.str == "true") {
253                 t.type = Token::KEYWORD_TRUE;
254                 return true;
255         } else {
256                 return false;
257         }
258 }
259
260 }