]> git.localhorst.tv Git - l2e.git/blob - src/loader/Tokenizer.cpp
made parsing exceptions a little more informative
[l2e.git] / src / loader / Tokenizer.cpp
1 /*
2  * Tokenizer.cpp
3  *
4  *  Created on: Aug 26, 2012
5  *      Author: holy
6  */
7
8 #include "Tokenizer.h"
9
10 #include <istream>
11
12 namespace loader {
13
14 bool Tokenizer::HasMore() {
15         ScanSpace();
16         return in;
17 }
18
19 void Tokenizer::ScanSpace() {
20         std::istream::char_type c;
21         in.get(c);
22         while (in && std::isspace(c)) {
23                 if (c == '\n') {
24                         ++line;
25                 }
26                 in.get(c);
27         }
28         if (in) {
29                 in.putback(c);
30         }
31 }
32
33 void Tokenizer::Putback(const Token &t) {
34         if (isPutback) {
35                 throw LexerError(line, "Tokenizer: double putback not supported");
36         } else {
37                 putback = t;
38                 isPutback = true;
39         }
40 }
41
42 const Tokenizer::Token &Tokenizer::Peek() {
43         if (!isPutback) {
44                 putback = GetNext();
45                 isPutback = true;
46         }
47         return putback;
48 }
49
50 Tokenizer::Token Tokenizer::GetNext() {
51         if (isPutback) {
52                 isPutback = false;
53                 return putback;
54         } else {
55                 return ReadToken();
56         }
57 }
58
59 Tokenizer::Token Tokenizer::ReadToken() {
60         ScanSpace();
61         std::istream::char_type c;
62         in.get(c);
63         switch (c) {
64                 case Token::ANGLE_BRACKET_OPEN:
65                 case Token::ANGLE_BRACKET_CLOSE:
66                 case Token::CHEVRON_OPEN:
67                 case Token::CHEVRON_CLOSE:
68                 case Token::COLON:
69                 case Token::COMMA:
70                 case Token::BRACKET_OPEN:
71                 case Token::BRACKET_CLOSE:
72                 case Token::PARENTHESIS_OPEN:
73                 case Token::PARENTHESIS_CLOSE:
74                         return Token ((Token::Type) c);
75                 case '+': case '-':
76                 case '0': case '1': case '2': case '3': case '4':
77                 case '5': case '6': case '7': case '8': case '9':
78                         in.putback(c);
79                         return ReadNumber();
80                 case '"':
81                         in.putback(c);
82                         return ReadString();
83                 default:
84                         in.putback(c);
85                         {
86                                 Token t(ReadIdentifier());
87                                 if (std::isupper(c)) {
88                                         t.type = Token::TYPE_NAME;
89                                 } else if (std::islower(c)) {
90                                         CheckKeyword(t);
91                                 } else {
92                                         throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c);
93                                 }
94                                 return t;
95                         }
96         }
97 }
98
99 Tokenizer::Token Tokenizer::ReadNumber() {
100         Token t(Token::NUMBER);
101         bool isNegative(false);
102
103         std::istream::char_type c;
104         in.get(c);
105         if (c == '-') {
106                 isNegative = true;
107         } else if (c != '+') {
108                 in.putback(c);
109         }
110
111         while (in.get(c)) {
112                 if (!std::isdigit(c)) {
113                         in.putback(c);
114                         break;
115                 }
116                 t.number *= 10;
117                 t.number += c - '0';
118         }
119
120         if (isNegative) t.number *= -1;
121
122         return t;
123 }
124
125 Tokenizer::Token Tokenizer::ReadString() {
126         Token t(Token::STRING);
127         bool escape(false);
128
129         std::istream::char_type c;
130         in.get(c);
131         if (c != '"') {
132                 throw LexerError(line, "Tokenizer: strings must begin with '\"'");
133         }
134
135         while (in.get(c)) {
136                 if (escape) {
137                         escape = false;
138                         switch (c) {
139                                 case 'n':
140                                         t.str.push_back('\n');
141                                         break;
142                                 case 'r':
143                                         t.str.push_back('\r');
144                                         break;
145                                 case 't':
146                                         t.str.push_back('\t');
147                                         break;
148                                 default:
149                                         t.str.push_back(c);
150                                         break;
151                         }
152                 } else if (c == '"') {
153                         break;
154                 } else if (c == '\\') {
155                         escape = true;
156                 } else {
157                         t.str.push_back(c);
158                 }
159         }
160
161         return t;
162 }
163
164 Tokenizer::Token Tokenizer::ReadIdentifier() {
165         Token t(Token::IDENTIFIER);
166
167         std::istream::char_type c;
168         while (in.get(c)) {
169                 if (std::isalnum(c) || c == '_') {
170                         t.str.push_back(c);
171                 } else {
172                         in.putback(c);
173                         break;
174                 }
175         }
176
177         return t;
178 }
179
180 bool Tokenizer::CheckKeyword(Token &t) {
181         if (t.str == "export") {
182                 t.type = Token::KEYWORD_EXPORT;
183                 return true;
184         } else if (t.str == "false") {
185                 t.type = Token::KEYWORD_FALSE;
186                 return true;
187         } else if (t.str == "include") {
188                 t.type = Token::KEYWORD_INCLUDE;
189                 return true;
190         } else if (t.str == "true") {
191                 t.type = Token::KEYWORD_TRUE;
192                 return true;
193         } else {
194                 return false;
195         }
196 }
197
198 }