]> git.localhorst.tv Git - l2e.git/blob - src/loader/Tokenizer.cpp
introduced exception type for tokenizer
[l2e.git] / src / loader / Tokenizer.cpp
1 /*
2  * Tokenizer.cpp
3  *
4  *  Created on: Aug 26, 2012
5  *      Author: holy
6  */
7
8 #include "Tokenizer.h"
9
10 #include <istream>
11
12 namespace loader {
13
14 bool Tokenizer::HasMore() {
15         return in;
16 }
17
18 void Tokenizer::Putback(const Token &t) {
19         if (isPutback) {
20                 throw LexerError("Tokenizer: double putback not supported");
21         } else {
22                 putback = t;
23                 isPutback = true;
24         }
25 }
26
27 Tokenizer::Token Tokenizer::GetNext() {
28         if (isPutback) {
29                 isPutback = false;
30                 return putback;
31         } else {
32                 return ReadToken();
33         }
34 }
35
36 Tokenizer::Token Tokenizer::ReadToken() {
37         std::istream::char_type c;
38         in.get(c);
39         while (std::isspace(c)) in.get(c);
40         switch (c) {
41                 case Token::ANGLE_BRACKET_OPEN:
42                 case Token::ANGLE_BRACKET_CLOSE:
43                 case Token::CHEVRON_OPEN:
44                 case Token::CHEVRON_CLOSE:
45                 case Token::COLON:
46                 case Token::COMMA:
47                 case Token::BRACKET_OPEN:
48                 case Token::BRACKET_CLOSE:
49                         return Token ((Token::Type) c);
50                 case '+': case '-':
51                 case '0': case '1': case '2': case '3': case '4':
52                 case '5': case '6': case '7': case '8': case '9':
53                         in.putback(c);
54                         return ReadNumber();
55                 case '"':
56                         in.putback(c);
57                         return ReadString();
58                 default:
59                         in.putback(c);
60                         {
61                                 Token t(ReadIdentifier());
62                                 if (std::isupper(c)) {
63                                         t.type = Token::TYPE_NAME;
64                                 } else if (std::islower(c)) {
65                                         CheckKeyword(t);
66                                 } else {
67                                         throw LexerError(std::string("Tokenizer: cannot parse token: ") + c);
68                                 }
69                                 return t;
70                         }
71         }
72 }
73
74 Tokenizer::Token Tokenizer::ReadNumber() {
75         Token t(Token::NUMBER);
76         bool isNegative(false);
77
78         std::istream::char_type c;
79         in.get(c);
80         if (c == '-') {
81                 isNegative = true;
82         } else if (c != '+') {
83                 in.putback(c);
84         }
85
86         while (in.get(c)) {
87                 if (!std::isdigit(c)) {
88                         in.putback(c);
89                         break;
90                 }
91                 t.number *= 10;
92                 t.number += c - '0';
93         }
94
95         if (isNegative) t.number *= -1;
96
97         return t;
98 }
99
100 Tokenizer::Token Tokenizer::ReadString() {
101         Token t(Token::STRING);
102         bool escape(false);
103
104         std::istream::char_type c;
105         in.get(c);
106         if (c != '"') {
107                 throw LexerError("Tokenizer: strings must begin with '\"'");
108         }
109
110         while (in.get(c)) {
111                 if (escape) {
112                         escape = false;
113                         switch (c) {
114                                 case 'n':
115                                         t.str.push_back('\n');
116                                         break;
117                                 case 'r':
118                                         t.str.push_back('\r');
119                                         break;
120                                 case 't':
121                                         t.str.push_back('\t');
122                                         break;
123                                 default:
124                                         t.str.push_back(c);
125                                         break;
126                         }
127                 } else if (c == '"') {
128                         break;
129                 } else if (c == '\\') {
130                         escape = true;
131                 } else {
132                         t.str.push_back(c);
133                 }
134         }
135
136         return t;
137 }
138
139 Tokenizer::Token Tokenizer::ReadIdentifier() {
140         Token t(Token::IDENTIFIER);
141
142         std::istream::char_type c;
143         while (in.get(c)) {
144                 if (std::isalnum(c) || c == '_') {
145                         t.str.push_back(c);
146                 } else {
147                         in.putback(c);
148                         break;
149                 }
150         }
151
152         return t;
153 }
154
155 bool Tokenizer::CheckKeyword(Token &t) {
156         if (t.str == "export") {
157                 t.type = Token::KEYWORD_EXPORT;
158                 return true;
159         } else if (t.str == "false") {
160                 t.type = Token::KEYWORD_FALSE;
161                 return true;
162         } else if (t.str == "include") {
163                 t.type = Token::KEYWORD_INCLUDE;
164                 return true;
165         } else if (t.str == "true") {
166                 t.type = Token::KEYWORD_TRUE;
167                 return true;
168         } else {
169                 return false;
170         }
171 }
172
173 }