#include "Tokenizer.h"
#include <istream>
-#include <stdexcept>
namespace loader {
bool Tokenizer::HasMore() {
- return in;
+ if (isPutback) return true;
+ ScanSpace();
+ if (!in) return false;
+
+ putback = ReadToken();
+ isPutback = true;
+ if (!skipComments || putback.type != Token::COMMENT) return true;
+
+ while (in && putback.type == Token::COMMENT) {
+ putback = ReadToken();
+ ScanSpace();
+ }
+ return putback.type != Token::COMMENT;
+}
+
+void Tokenizer::ScanSpace() {
+ std::istream::char_type c;
+ in.get(c);
+ while (in && std::isspace(c)) {
+ if (c == '\n') {
+ ++line;
+ }
+ in.get(c);
+ }
+ if (in) {
+ in.putback(c);
+ }
}
void Tokenizer::Putback(const Token &t) {
if (isPutback) {
- throw std::runtime_error("Tokenizer: double putback not supported");
+ throw LexerError(line, "Tokenizer: double putback not supported");
} else {
putback = t;
isPutback = true;
}
}
+const Tokenizer::Token &Tokenizer::Peek() {
+ if (!isPutback) {
+ putback = GetNext();
+ isPutback = true;
+ }
+ return putback;
+}
+
Tokenizer::Token Tokenizer::GetNext() {
+ if (!HasMore()) {
+ throw LexerError(line, "read beyond last token");
+ }
if (isPutback) {
isPutback = false;
return putback;
}
Tokenizer::Token Tokenizer::ReadToken() {
+ ScanSpace();
std::istream::char_type c;
in.get(c);
- while (std::isspace(c)) in.get(c);
switch (c) {
case Token::ANGLE_BRACKET_OPEN:
case Token::ANGLE_BRACKET_CLOSE:
case Token::COMMA:
case Token::BRACKET_OPEN:
case Token::BRACKET_CLOSE:
+ case Token::PARENTHESIS_OPEN:
+ case Token::PARENTHESIS_CLOSE:
return Token ((Token::Type) c);
case '+': case '-':
case '0': case '1': case '2': case '3': case '4':
case '"':
in.putback(c);
return ReadString();
+ case '/':
+ {
+ std::istream::char_type c2;
+ in.get(c2);
+ if (c2 == '/') {
+ return ReadComment();
+ } else if (c2 == '*') {
+ return ReadMultilineComment();
+ } else {
+ throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c + c2 + ": expected / or *");
+ }
+ }
+ break;
default:
in.putback(c);
{
} else if (std::islower(c)) {
CheckKeyword(t);
} else {
- throw std::runtime_error(std::string("Tokenizer: cannot parse token: ") + c);
+ throw LexerError(line, std::string("Tokenizer: cannot parse token: ") + c);
}
return t;
}
std::istream::char_type c;
in.get(c);
if (c != '"') {
- throw std::runtime_error("Tokenizer: strings must begin with '\"'");
+ throw LexerError(line, "Tokenizer: strings must begin with '\"'");
}
while (in.get(c)) {
return t;
}
+Tokenizer::Token Tokenizer::ReadComment() {
+ std::istream::char_type c;
+ while (in.get(c) && c != '\n');
+ ++line;
+ return Token(Token::COMMENT);
+}
+
+Tokenizer::Token Tokenizer::ReadMultilineComment() {
+ std::istream::char_type c;
+ while (in.get(c)) {
+ if (c == '*') {
+ std::istream::char_type c2;
+ if (in.get(c2) && c2 == '/') {
+ break;
+ }
+ } else if (c == '\n') {
+ ++line;
+ }
+ }
+ return Token(Token::COMMENT);
+}
+
bool Tokenizer::CheckKeyword(Token &t) {
if (t.str == "export") {
t.type = Token::KEYWORD_EXPORT;