]> git.localhorst.tv Git - l2e.git/blob - src/loader/Tokenizer.cpp
made tokenizer and token constructors explicit
[l2e.git] / src / loader / Tokenizer.cpp
1 /*
2  * Tokenizer.cpp
3  *
4  *  Created on: Aug 26, 2012
5  *      Author: holy
6  */
7
8 #include "Tokenizer.h"
9
10 #include <istream>
11 #include <stdexcept>
12
13 namespace loader {
14
15 bool Tokenizer::HasMore() {
16         return in;
17 }
18
19 void Tokenizer::Putback(const Token &t) {
20         if (isPutback) {
21                 throw std::runtime_error("Tokenizer: double putback not supported");
22         } else {
23                 putback = t;
24                 isPutback = true;
25         }
26 }
27
28 Tokenizer::Token Tokenizer::GetNext() {
29         if (isPutback) {
30                 isPutback = false;
31                 return putback;
32         } else {
33                 return ReadToken();
34         }
35 }
36
37 Tokenizer::Token Tokenizer::ReadToken() {
38         std::istream::char_type c;
39         in.get(c);
40         while (std::isspace(c)) in.get(c);
41         switch (c) {
42                 case Token::ANGLE_BRACKET_OPEN:
43                 case Token::ANGLE_BRACKET_CLOSE:
44                 case Token::CHEVRON_OPEN:
45                 case Token::CHEVRON_CLOSE:
46                 case Token::COLON:
47                 case Token::COMMA:
48                 case Token::BRACKET_OPEN:
49                 case Token::BRACKET_CLOSE:
50                         return Token ((Token::Type) c);
51                 case '+': case '-':
52                 case '0': case '1': case '2': case '3': case '4':
53                 case '5': case '6': case '7': case '8': case '9':
54                         in.putback(c);
55                         return ReadNumber();
56                 case '"':
57                         in.putback(c);
58                         return ReadString();
59                 default:
60                         in.putback(c);
61                         {
62                                 Token t(ReadIdentifier());
63                                 if (std::isupper(c)) {
64                                         t.type = Token::TYPE_NAME;
65                                 } else if (std::islower(c)) {
66                                         CheckKeyword(t);
67                                 } else {
68                                         throw std::runtime_error(std::string("Tokenizer: cannot parse token: ") + c);
69                                 }
70                                 return t;
71                         }
72         }
73 }
74
75 Tokenizer::Token Tokenizer::ReadNumber() {
76         Token t(Token::NUMBER);
77         bool isNegative(false);
78
79         std::istream::char_type c;
80         in.get(c);
81         if (c == '-') {
82                 isNegative = true;
83         } else if (c != '+') {
84                 in.putback(c);
85         }
86
87         while (in.get(c)) {
88                 if (!std::isdigit(c)) {
89                         in.putback(c);
90                         break;
91                 }
92                 t.number *= 10;
93                 t.number += c - '0';
94         }
95
96         if (isNegative) t.number *= -1;
97
98         return t;
99 }
100
101 Tokenizer::Token Tokenizer::ReadString() {
102         Token t(Token::STRING);
103         bool escape(false);
104
105         std::istream::char_type c;
106         in.get(c);
107         if (c != '"') {
108                 throw std::runtime_error("Tokenizer: strings must begin with '\"'");
109         }
110
111         while (in.get(c)) {
112                 if (escape) {
113                         escape = false;
114                         switch (c) {
115                                 case 'n':
116                                         t.str.push_back('\n');
117                                         break;
118                                 case 'r':
119                                         t.str.push_back('\r');
120                                         break;
121                                 case 't':
122                                         t.str.push_back('\t');
123                                         break;
124                                 default:
125                                         t.str.push_back(c);
126                                         break;
127                         }
128                 } else if (c == '"') {
129                         break;
130                 } else if (c == '\\') {
131                         escape = true;
132                 } else {
133                         t.str.push_back(c);
134                 }
135         }
136
137         return t;
138 }
139
140 Tokenizer::Token Tokenizer::ReadIdentifier() {
141         Token t(Token::IDENTIFIER);
142
143         std::istream::char_type c;
144         while (in.get(c)) {
145                 if (std::isalnum(c) || c == '_') {
146                         t.str.push_back(c);
147                 } else {
148                         in.putback(c);
149                         break;
150                 }
151         }
152
153         return t;
154 }
155
156 bool Tokenizer::CheckKeyword(Token &t) {
157         if (t.str == "export") {
158                 t.type = Token::KEYWORD_EXPORT;
159                 return true;
160         } else if (t.str == "false") {
161                 t.type = Token::KEYWORD_FALSE;
162                 return true;
163         } else if (t.str == "include") {
164                 t.type = Token::KEYWORD_INCLUDE;
165                 return true;
166         } else if (t.str == "true") {
167                 t.type = Token::KEYWORD_TRUE;
168                 return true;
169         } else {
170                 return false;
171         }
172 }
173
174 }