]> git.localhorst.tv Git - l2e.git/blob - src/loader/Tokenizer.cpp
added peek function to tokenizer
[l2e.git] / src / loader / Tokenizer.cpp
1 /*
2  * Tokenizer.cpp
3  *
4  *  Created on: Aug 26, 2012
5  *      Author: holy
6  */
7
8 #include "Tokenizer.h"
9
10 #include <istream>
11
12 namespace loader {
13
14 bool Tokenizer::HasMore() {
15         return in;
16 }
17
18 void Tokenizer::Putback(const Token &t) {
19         if (isPutback) {
20                 throw LexerError("Tokenizer: double putback not supported");
21         } else {
22                 putback = t;
23                 isPutback = true;
24         }
25 }
26
27 const Tokenizer::Token &Tokenizer::Peek() {
28         if (!isPutback) {
29                 putback = GetNext();
30                 isPutback = true;
31         }
32         return putback;
33 }
34
35 Tokenizer::Token Tokenizer::GetNext() {
36         if (isPutback) {
37                 isPutback = false;
38                 return putback;
39         } else {
40                 return ReadToken();
41         }
42 }
43
44 Tokenizer::Token Tokenizer::ReadToken() {
45         std::istream::char_type c;
46         in.get(c);
47         while (std::isspace(c)) in.get(c);
48         switch (c) {
49                 case Token::ANGLE_BRACKET_OPEN:
50                 case Token::ANGLE_BRACKET_CLOSE:
51                 case Token::CHEVRON_OPEN:
52                 case Token::CHEVRON_CLOSE:
53                 case Token::COLON:
54                 case Token::COMMA:
55                 case Token::BRACKET_OPEN:
56                 case Token::BRACKET_CLOSE:
57                 case Token::PARENTHESIS_OPEN:
58                 case Token::PARENTHESIS_CLOSE:
59                         return Token ((Token::Type) c);
60                 case '+': case '-':
61                 case '0': case '1': case '2': case '3': case '4':
62                 case '5': case '6': case '7': case '8': case '9':
63                         in.putback(c);
64                         return ReadNumber();
65                 case '"':
66                         in.putback(c);
67                         return ReadString();
68                 default:
69                         in.putback(c);
70                         {
71                                 Token t(ReadIdentifier());
72                                 if (std::isupper(c)) {
73                                         t.type = Token::TYPE_NAME;
74                                 } else if (std::islower(c)) {
75                                         CheckKeyword(t);
76                                 } else {
77                                         throw LexerError(std::string("Tokenizer: cannot parse token: ") + c);
78                                 }
79                                 return t;
80                         }
81         }
82 }
83
84 Tokenizer::Token Tokenizer::ReadNumber() {
85         Token t(Token::NUMBER);
86         bool isNegative(false);
87
88         std::istream::char_type c;
89         in.get(c);
90         if (c == '-') {
91                 isNegative = true;
92         } else if (c != '+') {
93                 in.putback(c);
94         }
95
96         while (in.get(c)) {
97                 if (!std::isdigit(c)) {
98                         in.putback(c);
99                         break;
100                 }
101                 t.number *= 10;
102                 t.number += c - '0';
103         }
104
105         if (isNegative) t.number *= -1;
106
107         return t;
108 }
109
110 Tokenizer::Token Tokenizer::ReadString() {
111         Token t(Token::STRING);
112         bool escape(false);
113
114         std::istream::char_type c;
115         in.get(c);
116         if (c != '"') {
117                 throw LexerError("Tokenizer: strings must begin with '\"'");
118         }
119
120         while (in.get(c)) {
121                 if (escape) {
122                         escape = false;
123                         switch (c) {
124                                 case 'n':
125                                         t.str.push_back('\n');
126                                         break;
127                                 case 'r':
128                                         t.str.push_back('\r');
129                                         break;
130                                 case 't':
131                                         t.str.push_back('\t');
132                                         break;
133                                 default:
134                                         t.str.push_back(c);
135                                         break;
136                         }
137                 } else if (c == '"') {
138                         break;
139                 } else if (c == '\\') {
140                         escape = true;
141                 } else {
142                         t.str.push_back(c);
143                 }
144         }
145
146         return t;
147 }
148
149 Tokenizer::Token Tokenizer::ReadIdentifier() {
150         Token t(Token::IDENTIFIER);
151
152         std::istream::char_type c;
153         while (in.get(c)) {
154                 if (std::isalnum(c) || c == '_') {
155                         t.str.push_back(c);
156                 } else {
157                         in.putback(c);
158                         break;
159                 }
160         }
161
162         return t;
163 }
164
165 bool Tokenizer::CheckKeyword(Token &t) {
166         if (t.str == "export") {
167                 t.type = Token::KEYWORD_EXPORT;
168                 return true;
169         } else if (t.str == "false") {
170                 t.type = Token::KEYWORD_FALSE;
171                 return true;
172         } else if (t.str == "include") {
173                 t.type = Token::KEYWORD_INCLUDE;
174                 return true;
175         } else if (t.str == "true") {
176                 t.type = Token::KEYWORD_TRUE;
177                 return true;
178         } else {
179                 return false;
180         }
181 }
182
183 }