]> git.localhorst.tv Git - l2e.git/blob - src/loader/Tokenizer.cpp
Merge branch 'loader'
[l2e.git] / src / loader / Tokenizer.cpp
1 /*
2  * Tokenizer.cpp
3  *
4  *  Created on: Aug 26, 2012
5  *      Author: holy
6  */
7
8 #include "Tokenizer.h"
9
10 #include <istream>
11
12 namespace loader {
13
14 bool Tokenizer::HasMore() {
15         if (isPutback) return true;
16         ScanSpace();
17         if (!in) return false;
18
19         putback = ReadToken();
20         isPutback = true;
21         if (!skipComments || putback.type != Token::COMMENT) return true;
22
23         while (in && putback.type == Token::COMMENT) {
24                 putback = ReadToken();
25                 ScanSpace();
26         }
27         return putback.type != Token::COMMENT;
28 }
29
30 void Tokenizer::ScanSpace() {
31         std::istream::char_type c;
32         in.get(c);
33         while (in && std::isspace(c)) {
34                 if (c == '\n') {
35                         ++line;
36                 }
37                 in.get(c);
38         }
39         if (in) {
40                 in.putback(c);
41         }
42 }
43
44 void Tokenizer::Putback(const Token &t) {
45         if (isPutback) {
46                 throw LexerError(line, "Tokenizer: double putback not supported");
47         } else {
48                 putback = t;
49                 isPutback = true;
50         }
51 }
52
53 const Tokenizer::Token &Tokenizer::Peek() {
54         if (!isPutback) {
55                 putback = GetNext();
56                 isPutback = true;
57         }
58         return putback;
59 }
60
61 Tokenizer::Token Tokenizer::GetNext() {
62         if (!HasMore()) {
63                 throw LexerError(line, "read beyond last token");
64         }
65         if (isPutback) {
66                 isPutback = false;
67                 return putback;
68         } else {
69                 return ReadToken();
70         }
71 }
72
73 Tokenizer::Token Tokenizer::ReadToken() {
74         ScanSpace();
75         std::istream::char_type c;
76         in.get(c);
77         switch (c) {
78                 case Token::ANGLE_BRACKET_OPEN:
79                 case Token::ANGLE_BRACKET_CLOSE:
80                 case Token::CHEVRON_OPEN:
81                 case Token::CHEVRON_CLOSE:
82                 case Token::COLON:
83                 case Token::COMMA:
84                 case Token::BRACKET_OPEN:
85                 case Token::BRACKET_CLOSE:
86                 case Token::PARENTHESIS_OPEN:
87                 case Token::PARENTHESIS_CLOSE:
88                         return Token ((Token::Type) c);
89                 case '+': case '-':
90                 case '0': case '1': case '2': case '3': case '4':
91                 case '5': case '6': case '7': case '8': case '9':
92                         in.putback(c);
93                         return ReadNumber();
94                 case '"':
95                         in.putback(c);
96                         return ReadString();
97                 case '/':
98                         {
99                                 std::istream::char_type c2;
100                                 in.get(c2);
101                                 if (c2 == '/') {
102                                         return ReadComment();
103                                 } else if (c2 == '*') {
104                                         return ReadMultilineComment();
105                                 } else {
106                                         throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c + c2 + ": expected / or *");
107                                 }
108                         }
109                         break;
110                 default:
111                         in.putback(c);
112                         {
113                                 Token t(ReadIdentifier());
114                                 if (std::isupper(c)) {
115                                         t.type = Token::TYPE_NAME;
116                                 } else if (std::islower(c)) {
117                                         CheckKeyword(t);
118                                 } else {
119                                         throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c);
120                                 }
121                                 return t;
122                         }
123         }
124 }
125
126 Tokenizer::Token Tokenizer::ReadNumber() {
127         Token t(Token::NUMBER);
128         bool isNegative(false);
129
130         std::istream::char_type c;
131         in.get(c);
132         if (c == '-') {
133                 isNegative = true;
134         } else if (c != '+') {
135                 in.putback(c);
136         }
137
138         while (in.get(c)) {
139                 if (!std::isdigit(c)) {
140                         in.putback(c);
141                         break;
142                 }
143                 t.number *= 10;
144                 t.number += c - '0';
145         }
146
147         if (isNegative) t.number *= -1;
148
149         return t;
150 }
151
152 Tokenizer::Token Tokenizer::ReadString() {
153         Token t(Token::STRING);
154         bool escape(false);
155
156         std::istream::char_type c;
157         in.get(c);
158         if (c != '"') {
159                 throw LexerError(line, "Tokenizer: strings must begin with '\"'");
160         }
161
162         while (in.get(c)) {
163                 if (escape) {
164                         escape = false;
165                         switch (c) {
166                                 case 'n':
167                                         t.str.push_back('\n');
168                                         break;
169                                 case 'r':
170                                         t.str.push_back('\r');
171                                         break;
172                                 case 't':
173                                         t.str.push_back('\t');
174                                         break;
175                                 default:
176                                         t.str.push_back(c);
177                                         break;
178                         }
179                 } else if (c == '"') {
180                         break;
181                 } else if (c == '\\') {
182                         escape = true;
183                 } else {
184                         t.str.push_back(c);
185                 }
186         }
187
188         return t;
189 }
190
191 Tokenizer::Token Tokenizer::ReadIdentifier() {
192         Token t(Token::IDENTIFIER);
193
194         std::istream::char_type c;
195         while (in.get(c)) {
196                 if (std::isalnum(c) || c == '_') {
197                         t.str.push_back(c);
198                 } else {
199                         in.putback(c);
200                         break;
201                 }
202         }
203
204         return t;
205 }
206
207 Tokenizer::Token Tokenizer::ReadComment() {
208         std::istream::char_type c;
209         while (in.get(c) && c != '\n');
210         ++line;
211         return Token(Token::COMMENT);
212 }
213
214 Tokenizer::Token Tokenizer::ReadMultilineComment() {
215         std::istream::char_type c;
216         while (in.get(c)) {
217                 if (c == '*') {
218                         std::istream::char_type c2;
219                         if (in.get(c2) && c2 == '/') {
220                                 break;
221                         }
222                 } else if (c == '\n') {
223                         ++line;
224                 }
225         }
226         return Token(Token::COMMENT);
227 }
228
229 bool Tokenizer::CheckKeyword(Token &t) {
230         if (t.str == "export") {
231                 t.type = Token::KEYWORD_EXPORT;
232                 return true;
233         } else if (t.str == "false") {
234                 t.type = Token::KEYWORD_FALSE;
235                 return true;
236         } else if (t.str == "include") {
237                 t.type = Token::KEYWORD_INCLUDE;
238                 return true;
239         } else if (t.str == "true") {
240                 t.type = Token::KEYWORD_TRUE;
241                 return true;
242         } else {
243                 return false;
244         }
245 }
246
247 }