]> git.localhorst.tv Git - l2e.git/blob - src/loader/Tokenizer.cpp
added script assembler
[l2e.git] / src / loader / Tokenizer.cpp
1 /*
2  * Tokenizer.cpp
3  *
4  *  Created on: Aug 26, 2012
5  *      Author: holy
6  */
7
8 #include "Tokenizer.h"
9
10 #include <istream>
11
12 namespace loader {
13
14 bool Tokenizer::HasMore() {
15         if (isPutback) return true;
16         ScanSpace();
17         if (!in) return false;
18
19         putback = ReadToken();
20         isPutback = true;
21         if (!skipComments || putback.type != Token::COMMENT) return true;
22
23         while (in && putback.type == Token::COMMENT) {
24                 putback = ReadToken();
25                 ScanSpace();
26         }
27         return putback.type != Token::COMMENT;
28 }
29
30 void Tokenizer::ScanSpace() {
31         std::istream::char_type c;
32         in.get(c);
33         while (in && std::isspace(c)) {
34                 if (c == '\n') {
35                         ++line;
36                 }
37                 in.get(c);
38         }
39         if (in) {
40                 in.putback(c);
41         }
42 }
43
44 void Tokenizer::Putback(const Token &t) {
45         if (isPutback) {
46                 throw LexerError(line, "Tokenizer: double putback not supported");
47         } else {
48                 putback = t;
49                 isPutback = true;
50         }
51 }
52
53 const Tokenizer::Token &Tokenizer::Peek() {
54         if (!isPutback) {
55                 putback = GetNext();
56                 isPutback = true;
57         }
58         return putback;
59 }
60
61 Tokenizer::Token Tokenizer::GetNext() {
62         if (!HasMore()) {
63                 throw LexerError(line, "read beyond last token");
64         }
65         if (isPutback) {
66                 isPutback = false;
67                 return putback;
68         } else {
69                 return ReadToken();
70         }
71 }
72
73 Tokenizer::Token Tokenizer::ReadToken() {
74         ScanSpace();
75         std::istream::char_type c;
76         in.get(c);
77         switch (c) {
78                 case Token::ANGLE_BRACKET_OPEN:
79                 case Token::ANGLE_BRACKET_CLOSE:
80                 case Token::COLON:
81                 case Token::COMMA:
82                 case Token::BRACKET_OPEN:
83                 case Token::BRACKET_CLOSE:
84                 case Token::PARENTHESIS_OPEN:
85                 case Token::PARENTHESIS_CLOSE:
86                 case Token::COMMAND:
87                 case Token::REGISTER:
88                         return Token ((Token::Type) c);
89                 case '<': {
90                         std::istream::char_type c2;
91                         in.get(c2);
92                         if (c2 == '<') {
93                                 return Token(Token::SCRIPT_BEGIN);
94                         } else {
95                                 in.putback(c2);
96                                 return Token(Token::CHEVRON_OPEN);
97                         }
98                 }
99                 case '>': {
100                         std::istream::char_type c2;
101                         in.get(c2);
102                         if (c2 == '>') {
103                                 return Token(Token::SCRIPT_END);
104                         } else {
105                                 in.putback(c2);
106                                 return Token(Token::CHEVRON_CLOSE);
107                         }
108                 }
109                 case '+': case '-':
110                 case '0': case '1': case '2': case '3': case '4':
111                 case '5': case '6': case '7': case '8': case '9':
112                         in.putback(c);
113                         return ReadNumber();
114                 case '"':
115                         in.putback(c);
116                         return ReadString();
117                 case '/':
118                         {
119                                 std::istream::char_type c2;
120                                 in.get(c2);
121                                 if (c2 == '/') {
122                                         return ReadComment();
123                                 } else if (c2 == '*') {
124                                         return ReadMultilineComment();
125                                 } else {
126                                         throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c + c2 + ": expected / or *");
127                                 }
128                         }
129                         break;
130                 default:
131                         in.putback(c);
132                         {
133                                 Token t(ReadIdentifier());
134                                 if (std::isupper(c)) {
135                                         t.type = Token::TYPE_NAME;
136                                 } else if (std::islower(c)) {
137                                         CheckKeyword(t);
138                                 } else {
139                                         throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c);
140                                 }
141                                 return t;
142                         }
143         }
144 }
145
146 Tokenizer::Token Tokenizer::ReadNumber() {
147         Token t(Token::NUMBER);
148         bool isNegative(false);
149
150         std::istream::char_type c;
151         in.get(c);
152         if (c == '-') {
153                 isNegative = true;
154         } else if (c != '+') {
155                 in.putback(c);
156         }
157
158         while (in.get(c)) {
159                 if (!std::isdigit(c)) {
160                         in.putback(c);
161                         break;
162                 }
163                 t.number *= 10;
164                 t.number += c - '0';
165         }
166
167         if (isNegative) t.number *= -1;
168
169         return t;
170 }
171
172 Tokenizer::Token Tokenizer::ReadString() {
173         Token t(Token::STRING);
174         bool escape(false);
175
176         std::istream::char_type c;
177         in.get(c);
178         if (c != '"') {
179                 throw LexerError(line, "Tokenizer: strings must begin with '\"'");
180         }
181
182         while (in.get(c)) {
183                 if (escape) {
184                         escape = false;
185                         switch (c) {
186                                 case 'n':
187                                         t.str.push_back('\n');
188                                         break;
189                                 case 'r':
190                                         t.str.push_back('\r');
191                                         break;
192                                 case 't':
193                                         t.str.push_back('\t');
194                                         break;
195                                 default:
196                                         t.str.push_back(c);
197                                         break;
198                         }
199                 } else if (c == '"') {
200                         break;
201                 } else if (c == '\\') {
202                         escape = true;
203                 } else {
204                         t.str.push_back(c);
205                 }
206         }
207
208         return t;
209 }
210
211 Tokenizer::Token Tokenizer::ReadIdentifier() {
212         Token t(Token::IDENTIFIER);
213
214         std::istream::char_type c;
215         while (in.get(c)) {
216                 if (std::isalnum(c) || c == '_') {
217                         t.str.push_back(c);
218                 } else {
219                         in.putback(c);
220                         break;
221                 }
222         }
223
224         return t;
225 }
226
227 Tokenizer::Token Tokenizer::ReadComment() {
228         std::istream::char_type c;
229         while (in.get(c) && c != '\n');
230         ++line;
231         return Token(Token::COMMENT);
232 }
233
234 Tokenizer::Token Tokenizer::ReadMultilineComment() {
235         std::istream::char_type c;
236         while (in.get(c)) {
237                 if (c == '*') {
238                         std::istream::char_type c2;
239                         if (in.get(c2) && c2 == '/') {
240                                 break;
241                         }
242                 } else if (c == '\n') {
243                         ++line;
244                 }
245         }
246         return Token(Token::COMMENT);
247 }
248
249 bool Tokenizer::CheckKeyword(Token &t) {
250         if (t.str == "export") {
251                 t.type = Token::KEYWORD_EXPORT;
252                 return true;
253         } else if (t.str == "false") {
254                 t.type = Token::KEYWORD_FALSE;
255                 return true;
256         } else if (t.str == "include") {
257                 t.type = Token::KEYWORD_INCLUDE;
258                 return true;
259         } else if (t.str == "true") {
260                 t.type = Token::KEYWORD_TRUE;
261                 return true;
262         } else {
263                 return false;
264         }
265 }
266
267 }