]> git.localhorst.tv Git - l2e.git/blob - src/loader/Tokenizer.cpp
added missing parentheses tokens
[l2e.git] / src / loader / Tokenizer.cpp
1 /*
2  * Tokenizer.cpp
3  *
4  *  Created on: Aug 26, 2012
5  *      Author: holy
6  */
7
8 #include "Tokenizer.h"
9
10 #include <istream>
11
12 namespace loader {
13
14 bool Tokenizer::HasMore() {
15         return in;
16 }
17
18 void Tokenizer::Putback(const Token &t) {
19         if (isPutback) {
20                 throw LexerError("Tokenizer: double putback not supported");
21         } else {
22                 putback = t;
23                 isPutback = true;
24         }
25 }
26
27 Tokenizer::Token Tokenizer::GetNext() {
28         if (isPutback) {
29                 isPutback = false;
30                 return putback;
31         } else {
32                 return ReadToken();
33         }
34 }
35
36 Tokenizer::Token Tokenizer::ReadToken() {
37         std::istream::char_type c;
38         in.get(c);
39         while (std::isspace(c)) in.get(c);
40         switch (c) {
41                 case Token::ANGLE_BRACKET_OPEN:
42                 case Token::ANGLE_BRACKET_CLOSE:
43                 case Token::CHEVRON_OPEN:
44                 case Token::CHEVRON_CLOSE:
45                 case Token::COLON:
46                 case Token::COMMA:
47                 case Token::BRACKET_OPEN:
48                 case Token::BRACKET_CLOSE:
49                 case Token::PARENTHESIS_OPEN:
50                 case Token::PARENTHESIS_CLOSE:
51                         return Token ((Token::Type) c);
52                 case '+': case '-':
53                 case '0': case '1': case '2': case '3': case '4':
54                 case '5': case '6': case '7': case '8': case '9':
55                         in.putback(c);
56                         return ReadNumber();
57                 case '"':
58                         in.putback(c);
59                         return ReadString();
60                 default:
61                         in.putback(c);
62                         {
63                                 Token t(ReadIdentifier());
64                                 if (std::isupper(c)) {
65                                         t.type = Token::TYPE_NAME;
66                                 } else if (std::islower(c)) {
67                                         CheckKeyword(t);
68                                 } else {
69                                         throw LexerError(std::string("Tokenizer: cannot parse token: ") + c);
70                                 }
71                                 return t;
72                         }
73         }
74 }
75
76 Tokenizer::Token Tokenizer::ReadNumber() {
77         Token t(Token::NUMBER);
78         bool isNegative(false);
79
80         std::istream::char_type c;
81         in.get(c);
82         if (c == '-') {
83                 isNegative = true;
84         } else if (c != '+') {
85                 in.putback(c);
86         }
87
88         while (in.get(c)) {
89                 if (!std::isdigit(c)) {
90                         in.putback(c);
91                         break;
92                 }
93                 t.number *= 10;
94                 t.number += c - '0';
95         }
96
97         if (isNegative) t.number *= -1;
98
99         return t;
100 }
101
102 Tokenizer::Token Tokenizer::ReadString() {
103         Token t(Token::STRING);
104         bool escape(false);
105
106         std::istream::char_type c;
107         in.get(c);
108         if (c != '"') {
109                 throw LexerError("Tokenizer: strings must begin with '\"'");
110         }
111
112         while (in.get(c)) {
113                 if (escape) {
114                         escape = false;
115                         switch (c) {
116                                 case 'n':
117                                         t.str.push_back('\n');
118                                         break;
119                                 case 'r':
120                                         t.str.push_back('\r');
121                                         break;
122                                 case 't':
123                                         t.str.push_back('\t');
124                                         break;
125                                 default:
126                                         t.str.push_back(c);
127                                         break;
128                         }
129                 } else if (c == '"') {
130                         break;
131                 } else if (c == '\\') {
132                         escape = true;
133                 } else {
134                         t.str.push_back(c);
135                 }
136         }
137
138         return t;
139 }
140
141 Tokenizer::Token Tokenizer::ReadIdentifier() {
142         Token t(Token::IDENTIFIER);
143
144         std::istream::char_type c;
145         while (in.get(c)) {
146                 if (std::isalnum(c) || c == '_') {
147                         t.str.push_back(c);
148                 } else {
149                         in.putback(c);
150                         break;
151                 }
152         }
153
154         return t;
155 }
156
157 bool Tokenizer::CheckKeyword(Token &t) {
158         if (t.str == "export") {
159                 t.type = Token::KEYWORD_EXPORT;
160                 return true;
161         } else if (t.str == "false") {
162                 t.type = Token::KEYWORD_FALSE;
163                 return true;
164         } else if (t.str == "include") {
165                 t.type = Token::KEYWORD_INCLUDE;
166                 return true;
167         } else if (t.str == "true") {
168                 t.type = Token::KEYWORD_TRUE;
169                 return true;
170         } else {
171                 return false;
172         }
173 }
174
175 }