From: Daniel Karbach Date: Sun, 26 Aug 2012 12:28:03 +0000 (+0200) Subject: added tokenizer X-Git-Url: https://git.localhorst.tv/?a=commitdiff_plain;h=93971b3ba23e6b51319b07d7d82dcbcf976f8b9a;p=l2e.git added tokenizer --- diff --git a/Debug/makefile b/Debug/makefile index 571f839..13878f3 100644 --- a/Debug/makefile +++ b/Debug/makefile @@ -9,6 +9,7 @@ RM := rm -rf # All of the sources participating in the build are defined here -include sources.mk -include src/sdl/subdir.mk +-include src/loader/subdir.mk -include src/graphics/subdir.mk -include src/common/subdir.mk -include src/battle/states/subdir.mk diff --git a/Debug/sources.mk b/Debug/sources.mk index a9593d6..7aaa6cf 100644 --- a/Debug/sources.mk +++ b/Debug/sources.mk @@ -25,6 +25,7 @@ C_UPPER_DEPS := SUBDIRS := \ src/sdl \ src \ +src/loader \ src/graphics \ src/common \ src/battle/states \ diff --git a/Debug/src/loader/subdir.mk b/Debug/src/loader/subdir.mk new file mode 100644 index 0000000..e91a415 --- /dev/null +++ b/Debug/src/loader/subdir.mk @@ -0,0 +1,24 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +# Add inputs and outputs from these tool invocations to the build variables +CPP_SRCS += \ +../src/loader/Tokenizer.cpp + +OBJS += \ +./src/loader/Tokenizer.o + +CPP_DEPS += \ +./src/loader/Tokenizer.d + + +# Each subdirectory must supply rules for building sources it contributes +src/loader/%.o: ../src/loader/%.cpp + @echo 'Building file: $<' + @echo 'Invoking: GCC C++ Compiler' + g++ -I/usr/include/SDL -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + + diff --git a/Release/makefile b/Release/makefile index 571f839..13878f3 100644 --- a/Release/makefile +++ b/Release/makefile @@ -9,6 +9,7 @@ RM := rm -rf # All of the sources participating in the build are defined here -include sources.mk -include src/sdl/subdir.mk +-include src/loader/subdir.mk -include src/graphics/subdir.mk -include src/common/subdir.mk -include src/battle/states/subdir.mk diff --git a/Release/sources.mk b/Release/sources.mk index a9593d6..7aaa6cf 100644 --- a/Release/sources.mk +++ b/Release/sources.mk @@ -25,6 +25,7 @@ C_UPPER_DEPS := SUBDIRS := \ src/sdl \ src \ +src/loader \ src/graphics \ src/common \ src/battle/states \ diff --git a/Release/src/loader/subdir.mk b/Release/src/loader/subdir.mk new file mode 100644 index 0000000..8c6e88c --- /dev/null +++ b/Release/src/loader/subdir.mk @@ -0,0 +1,24 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +# Add inputs and outputs from these tool invocations to the build variables +CPP_SRCS += \ +../src/loader/Tokenizer.cpp + +OBJS += \ +./src/loader/Tokenizer.o + +CPP_DEPS += \ +./src/loader/Tokenizer.d + + +# Each subdirectory must supply rules for building sources it contributes +src/loader/%.o: ../src/loader/%.cpp + @echo 'Building file: $<' + @echo 'Invoking: GCC C++ Compiler' + g++ -I/usr/include/SDL -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o "$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + + diff --git a/src/loader/Tokenizer.cpp b/src/loader/Tokenizer.cpp new file mode 100644 index 0000000..5f74d74 --- /dev/null +++ b/src/loader/Tokenizer.cpp @@ -0,0 +1,174 @@ +/* + * Tokenizer.cpp + * + * Created on: Aug 26, 2012 + * Author: holy + */ + +#include "Tokenizer.h" + +#include +#include + +namespace loader { + +bool Tokenizer::HasMore() { + return in; +} + +void Tokenizer::Putback(const Token &t) { + if (isPutback) { + throw std::runtime_error("Tokenizer: double putback not supported"); + } else { + putback = t; + isPutback = true; + } +} + +Tokenizer::Token Tokenizer::GetNext() { + if (isPutback) { + isPutback = false; + return putback; + } else { + return ReadToken(); + } +} + +Tokenizer::Token Tokenizer::ReadToken() { + std::istream::char_type c; + in.get(c); + while (std::isspace(c)) in.get(c); + switch (c) { + case Token::ANGLE_BRACKET_OPEN: + case Token::ANGLE_BRACKET_CLOSE: + case Token::CHEVRON_OPEN: + case Token::CHEVRON_CLOSE: + case Token::COLON: + case Token::COMMA: + case Token::BRACKET_OPEN: + case Token::BRACKET_CLOSE: + return (Token::Type) c; + case '+': case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + in.putback(c); + return ReadNumber(); + case '"': + in.putback(c); + return ReadString(); + default: + in.putback(c); + { + Token t(ReadIdentifier()); + if (std::isupper(c)) { + t.type = Token::TYPE_NAME; + } else if (std::islower(c)) { + CheckKeyword(t); + } else { + throw std::runtime_error(std::string("Tokenizer: cannot parse token: ") + c); + } + return t; + } + } +} + +Tokenizer::Token Tokenizer::ReadNumber() { + Token t(Token::NUMBER); + bool isNegative(false); + + std::istream::char_type c; + in.get(c); + if (c == '-') { + isNegative = true; + } else if (c != '+') { + in.putback(c); + } + + while (in.get(c)) { + if (!std::isdigit(c)) { + in.putback(c); + break; + } + t.number *= 10; + t.number += c - '0'; + } + + if (isNegative) t.number *= -1; + + return t; +} + +Tokenizer::Token Tokenizer::ReadString() { + Token t(Token::STRING); + bool escape(false); + + std::istream::char_type c; + in.get(c); + if (c != '"') { + throw std::runtime_error("Tokenizer: strings must begin with '\"'"); + } + + while (in.get(c)) { + if (escape) { + escape = false; + switch (c) { + case 'n': + t.str.push_back('\n'); + break; + case 'r': + t.str.push_back('\r'); + break; + case 't': + t.str.push_back('\t'); + break; + default: + t.str.push_back(c); + break; + } + } else if (c == '"') { + break; + } else if (c == '\\') { + escape = true; + } else { + t.str.push_back(c); + } + } + + return t; +} + +Tokenizer::Token Tokenizer::ReadIdentifier() { + Token t(Token::IDENTIFIER); + + std::istream::char_type c; + while (in.get(c)) { + if (std::isalnum(c) || c == '_') { + t.str.push_back(c); + } else { + in.putback(c); + break; + } + } + + return t; +} + +bool Tokenizer::CheckKeyword(Token &t) { + if (t.str == "export") { + t.type = Token::KEYWORD_EXPORT; + return true; + } else if (t.str == "false") { + t.type = Token::KEYWORD_FALSE; + return true; + } else if (t.str == "include") { + t.type = Token::KEYWORD_INCLUDE; + return true; + } else if (t.str == "true") { + t.type = Token::KEYWORD_TRUE; + return true; + } else { + return false; + } +} + +} diff --git a/src/loader/Tokenizer.h b/src/loader/Tokenizer.h new file mode 100644 index 0000000..f363af7 --- /dev/null +++ b/src/loader/Tokenizer.h @@ -0,0 +1,79 @@ +/* + * Tokenizer.h + * + * Created on: Aug 26, 2012 + * Author: holy + */ + +#ifndef LOADER_TOKENIZER_H_ +#define LOADER_TOKENIZER_H_ + +#include +#include + +namespace loader { + +class Tokenizer { + +public: + Tokenizer(std::istream &in) : in(in), isPutback(false) { } + ~Tokenizer() { } +private: + Tokenizer(const Tokenizer &); + Tokenizer &operator =(const Tokenizer &); + +public: + struct Token { + + enum Type { + UNKNOWN, + ANGLE_BRACKET_OPEN = '{', + ANGLE_BRACKET_CLOSE = '}', + CHEVRON_OPEN = '<', + CHEVRON_CLOSE = '>', + COLON = ':', + COMMA = ',', + BRACKET_OPEN = '[', + BRACKET_CLOSE = ']', + NUMBER, + STRING, + KEYWORD_EXPORT, + KEYWORD_FALSE, + KEYWORD_INCLUDE, + KEYWORD_TRUE, + IDENTIFIER, + TYPE_NAME, + }; + + Token() : type(UNKNOWN), number(0) { } + Token(Type t) : type(t), number(0) { } + + Type type; + std::string str; + int number; + + }; + + bool HasMore(); + Token GetNext(); + void Putback(const Token &); + +private: + Token ReadToken(); + + Token ReadNumber(); + Token ReadString(); + Token ReadIdentifier(); + + bool CheckKeyword(Token &); + +private: + std::istream ∈ + Token putback; + bool isPutback; + +}; + +} + +#endif /* LOADER_TOKENIZER_H_ */