]> git.localhorst.tv Git - l2e.git/blob - src/loader/Tokenizer.h
made parsing exceptions a little more informative
[l2e.git] / src / loader / Tokenizer.h
1 /*
2  * Tokenizer.h
3  *
4  *  Created on: Aug 26, 2012
5  *      Author: holy
6  */
7
8 #ifndef LOADER_TOKENIZER_H_
9 #define LOADER_TOKENIZER_H_
10
11 #include <iosfwd>
12 #include <ostream>
13 #include <stdexcept>
14 #include <string>
15
16 namespace loader {
17
18 class Tokenizer {
19
20 public:
21         explicit Tokenizer(std::istream &in) : in(in), line(1), isPutback(false) { }
22         ~Tokenizer() { }
23 private:
24         Tokenizer(const Tokenizer &);
25         Tokenizer &operator =(const Tokenizer &);
26
27 public:
28         struct Token {
29
30                 enum Type {
31                         UNKNOWN = 0,
32                         ANGLE_BRACKET_OPEN = '{',
33                         ANGLE_BRACKET_CLOSE = '}',
34                         CHEVRON_OPEN = '<',
35                         CHEVRON_CLOSE = '>',
36                         COLON = ':',
37                         COMMA = ',',
38                         BRACKET_OPEN = '[',
39                         BRACKET_CLOSE = ']',
40                         PARENTHESIS_OPEN = '(',
41                         PARENTHESIS_CLOSE = ')',
42                         NUMBER = '0',
43                         STRING = '"',
44                         KEYWORD_EXPORT = 'e',
45                         KEYWORD_FALSE = 'f',
46                         KEYWORD_INCLUDE = 'i',
47                         KEYWORD_TRUE = 't',
48                         IDENTIFIER = 'x',
49                         TYPE_NAME = 'n',
50                 };
51
52                 Token() : type(UNKNOWN), number(0) { }
53                 explicit Token(Type t) : type(t), number(0) { }
54
55                 Type type;
56                 std::string str;
57                 int number;
58
59         };
60
61         class LexerError: public std::runtime_error {
62         public:
63                 LexerError(int line, const std::string &msg)
64                 : std::runtime_error(msg), line(line) { }
65                 int Line() const { return line; }
66         private:
67                 int line;
68         };
69
70         bool HasMore();
71         Token GetNext();
72         const Token &Peek();
73         void Putback(const Token &);
74         int Line() const { return line; }
75
76 private:
77         void ScanSpace();
78         Token ReadToken();
79
80         Token ReadNumber();
81         Token ReadString();
82         Token ReadIdentifier();
83
84         bool CheckKeyword(Token &);
85
86 private:
87         std::istream &in;
88         Token putback;
89         int line;
90         bool isPutback;
91
92 };
93
94 inline const char *TokenTypeToString(Tokenizer::Token::Type t) {
95         switch (t) {
96                 case Tokenizer::Token::ANGLE_BRACKET_OPEN:
97                         return "ANGLE_BRACKET_OPEN";
98                 case Tokenizer::Token::ANGLE_BRACKET_CLOSE:
99                         return "ANGLE_BRACKET_CLOSE";
100                 case Tokenizer::Token::CHEVRON_OPEN:
101                         return "CHEVRON_OPEN";
102                 case Tokenizer::Token::CHEVRON_CLOSE:
103                         return "CHEVRON_CLOSE";
104                 case Tokenizer::Token::COLON:
105                         return "COLON";
106                 case Tokenizer::Token::COMMA:
107                         return "COMMA";
108                 case Tokenizer::Token::BRACKET_OPEN:
109                         return "BRACKET_OPEN";
110                 case Tokenizer::Token::BRACKET_CLOSE:
111                         return "BRACKET_CLOSE";
112                 case Tokenizer::Token::PARENTHESIS_OPEN:
113                         return "PARENTHESIS_OPEN";
114                 case Tokenizer::Token::PARENTHESIS_CLOSE:
115                         return "PARENTHESIS_CLOSE";
116                 case Tokenizer::Token::NUMBER:
117                         return "NUMBER";
118                 case Tokenizer::Token::STRING:
119                         return "STRING";
120                 case Tokenizer::Token::KEYWORD_EXPORT:
121                         return "KEYWORD_EXPORT";
122                 case Tokenizer::Token::KEYWORD_FALSE:
123                         return "KEYWORD_FALSE";
124                 case Tokenizer::Token::KEYWORD_INCLUDE:
125                         return "KEYWORD_INCLUDE";
126                 case Tokenizer::Token::KEYWORD_TRUE:
127                         return "KEYWORD_TRUE";
128                 case Tokenizer::Token::IDENTIFIER:
129                         return "IDENTIFIER";
130                 case Tokenizer::Token::TYPE_NAME:
131                         return "TYPE_NAME";
132                 default:
133                         return "UNKNOWN";
134         }
135 }
136
137 inline std::ostream &operator <<(std::ostream &out, Tokenizer::Token::Type t) {
138         out << TokenTypeToString(t);
139         return out;
140 }
141
142 }
143
144 #endif /* LOADER_TOKENIZER_H_ */