diff options
| -rw-r--r-- | GNUmakefile | 5 | ||||
| -rw-r--r-- | src/json.cpp | 61 | ||||
| -rw-r--r-- | src/json.h | 22 | ||||
| -rw-r--r-- | src/tokenizer.cpp | 95 | ||||
| -rw-r--r-- | src/tokenizer.h | 36 | ||||
| -rw-r--r-- | test/main.cpp | 44 |
6 files changed, 239 insertions, 24 deletions
diff --git a/GNUmakefile b/GNUmakefile index b8aab00..c4383d0 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -48,7 +48,7 @@ $(OBJS_DIR)/%.o: $(TEST_SRC_DIR)/%.cpp $(TEST_SOURCE) $(TEST_HEADERS) # Phony -.PHONY: clean +.PHONY: clean test clean: rm -f $(LIB) rm -f $(OBJS_DIR)/*.o @@ -56,4 +56,5 @@ clean: test: $(TEST) -$(OBJS_DIR)/json.o: $(SOURCE_DIR)/json.hpp +$(OBJS_DIR)/json.o: $(SOURCE_DIR)/json.hpp $(SOURCE_DIR)/tokenizer.h +$(OBJS_DIR)/tokenizer.o: diff --git a/src/json.cpp b/src/json.cpp index c11ca6e..9c1c3f9 100644 --- a/src/json.cpp +++ b/src/json.cpp @@ -1,7 +1,10 @@ #include "json.h" +#include "tokenizer.h" #include <cstddef> +#include <iostream> #include <stdexcept> +#include <string> namespace TehJSON { @@ -66,4 +69,62 @@ namespace TehJSON throw std::runtime_error("Node is a leaf!"); return children.size(); } + + Token JSON::consume() + { + if(tokenPos >= tokens.size()) + throw std::out_of_range("No tokens left, but json not finished!"); + return tokens[tokenPos++]; + } + + Token JSON::consume(TokenType type) + { + Token t = consume(); + if(t.type != type) + throw std::runtime_error("Wrong token type, expected: " + getTokenName(type) + ", but got: " + getTokenName(t.type)); + return t; + } + + TokenType JSON::nextTokenType() + { + if(tokenPos >= tokens.size()) + throw std::out_of_range("No tokens left, but json not finished!"); + return tokens[tokenPos].type; + } + + void JSON::readFromString(std::string s) + { + Tokenizer tokenizer; + tokenizer.appendInput(s); + std::vector<Token> stringTokens = tokenizer.tokenize(); + + readFromTokens(stringTokens, 0); + } + + int JSON::readFromTokens(std::vector<Token> tokens, int pos) + { + this->tokens = tokens; + tokenPos = pos; + + consume(TokenType::LBrace); + while(nextTokenType() != TokenType::RBrace) + { + Token childName = consume(TokenType::StringLit); + // std::cout << "Child: " << childName.content << std::endl; + consume(TokenType::Colon); + switch(nextTokenType()) + { + case TokenType::LBrace: tokenPos = children[childName.content].readFromTokens(tokens, tokenPos); break; + case TokenType::StringLit: children[childName.content].set<std::string>(consume(TokenType::StringLit).content); break; + case TokenType::IntLit: children[childName.content].set<int>(std::stoi(consume(TokenType::IntLit).content)); break; + case TokenType::FloatLit: children[childName.content].set<float>(std::stof(consume(TokenType::FloatLit).content)); break; + default: throw std::runtime_error("Token type is not a literal!"); + } + if(nextTokenType() != TokenType::RBrace) + consume(TokenType::Comma); + } + consume(TokenType::RBrace); + + return tokenPos; + } } @@ -1,7 +1,12 @@ +#pragma once + +#include "tokenizer.h" + #include <cstddef> #include <map> #include <memory> #include <string> +#include <vector> namespace TehJSON { @@ -12,14 +17,19 @@ namespace TehJSON JSON(const JSON& other) = default; ~JSON(); + // Writing methods + std::string getSerialized(); + std::string _getSerialized(int currIndent); + + // Reading methods + void readFromString(std::string s); + // Leaf methods template <typename T> T& get(); template <typename T> void set(T value); std::string leafType(); - std::string getSerialized(); - std::string _getSerialized(int currIndent); template <typename T> static std::string serializeData(std::shared_ptr<void> data); @@ -30,6 +40,14 @@ namespace TehJSON private: bool isLeaf = false; + // Reading data fields + std::vector<Token> tokens; + int tokenPos = 0; + Token consume(); + Token consume(TokenType type); + TokenType nextTokenType(); + int readFromTokens(std::vector<Token> tokens, int pos); + // Leaf data fields std::shared_ptr<void> data; std::string (*dataSerializer)(std::shared_ptr<void>); diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp new file mode 100644 index 0000000..ac7c215 --- /dev/null +++ b/src/tokenizer.cpp @@ -0,0 +1,95 @@ +#include "tokenizer.h" +#include <stdexcept> + +namespace TehJSON +{ + std::string getTokenName(TokenType t) + { + switch(t) + { + case TokenType::LBrace: return "LBrace"; + case TokenType::RBrace: return "RBrace"; + case TokenType::Colon: return "Colon"; + case TokenType::Comma: return "Comma"; + case TokenType::StringLit: return "StringLit"; + case TokenType::IntLit: return "IntLit"; + case TokenType::FloatLit: return "FloatLit"; + } + } + + void Tokenizer::appendInput(std::string s) + { + input += s; + } + + std::vector<Token> Tokenizer::tokenize() + { + std::vector<Token> tokens; + + int pos = 0; + while(pos < input.size()) + { + char c = input.at(pos); + switch(c) + { + case '\t': + case '\n': + case ' ': break; + case '"': + { + pos++; + if(pos >= input.size()) + throw std::out_of_range("String literal never ends!"); + std::string literalContent = ""; + c = input.at(pos); + while(c != '"') + { + literalContent += std::string{c}; + pos++; + if(pos >= input.size()) + throw std::out_of_range("String literal never ends!"); + c = input.at(pos); + } + tokens.push_back({TokenType::StringLit, literalContent}); + break; + } + case '0' ... '9': { + std::string literalContent{c}; + pos++; + c = input.at(pos); + bool isInt = true; + while((c >= '0' && c <= '9') || (c == '.')) + { + if(c == '.') + { + if(!isInt) + throw std::runtime_error("Cannot have multiple decimal places in float"); + isInt = false; + } + literalContent += std::string{c}; + pos++; + if(pos >= input.size()) + throw std::out_of_range("Number literal never ends!"); + c = input.at(pos); + } + pos--; + tokens.push_back({isInt?TokenType::IntLit:TokenType::FloatLit, literalContent}); + break; + } + case '{': tokens.push_back({TokenType::LBrace, std::string{c}}); break; + case '}': tokens.push_back({TokenType::RBrace, std::string{c}}); break; + case ':': tokens.push_back({TokenType::Colon, std::string{c}}); break; + case ',': tokens.push_back({TokenType::Comma, std::string{c}}); break; + default: throw std::runtime_error("Unexpected character: '" + std::string{c} + "'"); + } + pos++; + } + + return tokens; + } + + std::string Tokenizer::getInput() + { + return input; + } +} diff --git a/src/tokenizer.h b/src/tokenizer.h new file mode 100644 index 0000000..3322553 --- /dev/null +++ b/src/tokenizer.h @@ -0,0 +1,36 @@ +#pragma once + +#include <string> +#include <vector> + +namespace TehJSON +{ + enum struct TokenType + { + LBrace, + RBrace, + Colon, + Comma, + StringLit, + IntLit, + FloatLit, + }; + + std::string getTokenName(TokenType t); + + struct Token + { + TokenType type; + std::string content; + }; + + class Tokenizer + { + public: + void appendInput(std::string s); + std::vector<Token> tokenize(); + std::string getInput(); + private: + std::string input = ""; + }; +} diff --git a/test/main.cpp b/test/main.cpp index 9430503..e86ec06 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -1,4 +1,5 @@ #include <json.hpp> +#include <tokenizer.h> #include <iostream> #include <memory> @@ -85,26 +86,29 @@ template <> std::string TehJSON::JSON::serializeData<std::string>(std::shared_pt int main() { - TehJSON::JSON json; + TehJSON::JSON jsonWriter; // TestClass test1; // json["test1"].set(test1); - json["test_string"].set<std::string>("stringy"); - json["test_int"].set<int>(123); - json["test_float"].set<float>(51.8); - json["test_object"]["test_int"].set<int>(100); - json["test_object"]["test_float"].set<float>(100); - - cout << json.getSerialized() << endl; - - // std::shared_ptr<void> test; - // test = std::make_shared<TestClass>(1); - // test = std::make_shared<TestClass>(2); - // test = std::make_shared<std::string>(std::string("test")); - // std::string& testRef = *(std::string*)test.get(); - // cout << *(std::string*)test.get() << endl; - // testRef += " test2"; - // cout << *(std::string*)test.get() << endl; - - - cout << "abc" << endl; + jsonWriter["test_string"].set<std::string>("stringy"); + jsonWriter["test_int"].set<int>(123); + jsonWriter["test_float"].set<float>(51.8); + jsonWriter["test_object"]["test_int"].set<int>(100); + jsonWriter["test_object"]["test_float"].set<float>(100); + + std::string jsonString = jsonWriter.getSerialized(); + cout << jsonString << endl; + + // TehJSON::Tokenizer tokenizer; + // tokenizer.appendInput(jsonString); + // std::vector<TehJSON::Token> tokens = tokenizer.tokenize(); + + // for(const auto& token : tokens) + // { + // cout << TehJSON::getTokenName(token.type) << ": " << token.content << endl; + // } + + TehJSON::JSON jsonReader; + cout << "Reading: " << endl; + jsonReader.readFromString(jsonString); + cout << jsonReader.getSerialized() << endl; } |
