diff options
| author | Dylan <boss@tehbox.org> | 2026-06-06 19:36:41 +1200 |
|---|---|---|
| committer | Dylan <boss@tehbox.org> | 2026-06-06 19:36:41 +1200 |
| commit | 1d379a5cf34475f66f2ab9359f77dac162c0a40e (patch) | |
| tree | 395d54815331fbcad053001bf5cb28fafb69e9d4 /src | |
| parent | 46c896bcd78d31130321562b0659e28230261b8e (diff) | |
| download | tehjson-1d379a5cf34475f66f2ab9359f77dac162c0a40e.tar.gz tehjson-1d379a5cf34475f66f2ab9359f77dac162c0a40e.zip | |
feat: JSON reading
- Implemented a tokenizer for json
- Implemented a method which will read json from a string using the tokenizer
Diffstat (limited to 'src')
| -rw-r--r-- | src/json.cpp | 61 | ||||
| -rw-r--r-- | src/json.h | 22 | ||||
| -rw-r--r-- | src/tokenizer.cpp | 95 | ||||
| -rw-r--r-- | src/tokenizer.h | 36 |
4 files changed, 212 insertions, 2 deletions
diff --git a/src/json.cpp b/src/json.cpp index c11ca6e..9c1c3f9 100644 --- a/src/json.cpp +++ b/src/json.cpp @@ -1,7 +1,10 @@ #include "json.h" +#include "tokenizer.h" #include <cstddef> +#include <iostream> #include <stdexcept> +#include <string> namespace TehJSON { @@ -66,4 +69,62 @@ namespace TehJSON throw std::runtime_error("Node is a leaf!"); return children.size(); } + + Token JSON::consume() + { + if(tokenPos >= tokens.size()) + throw std::out_of_range("No tokens left, but json not finished!"); + return tokens[tokenPos++]; + } + + Token JSON::consume(TokenType type) + { + Token t = consume(); + if(t.type != type) + throw std::runtime_error("Wrong token type, expected: " + getTokenName(type) + ", but got: " + getTokenName(t.type)); + return t; + } + + TokenType JSON::nextTokenType() + { + if(tokenPos >= tokens.size()) + throw std::out_of_range("No tokens left, but json not finished!"); + return tokens[tokenPos].type; + } + + void JSON::readFromString(std::string s) + { + Tokenizer tokenizer; + tokenizer.appendInput(s); + std::vector<Token> stringTokens = tokenizer.tokenize(); + + readFromTokens(stringTokens, 0); + } + + int JSON::readFromTokens(std::vector<Token> tokens, int pos) + { + this->tokens = tokens; + tokenPos = pos; + + consume(TokenType::LBrace); + while(nextTokenType() != TokenType::RBrace) + { + Token childName = consume(TokenType::StringLit); + // std::cout << "Child: " << childName.content << std::endl; + consume(TokenType::Colon); + switch(nextTokenType()) + { + case TokenType::LBrace: tokenPos = children[childName.content].readFromTokens(tokens, tokenPos); break; + case TokenType::StringLit: children[childName.content].set<std::string>(consume(TokenType::StringLit).content); break; + case TokenType::IntLit: children[childName.content].set<int>(std::stoi(consume(TokenType::IntLit).content)); break; + case TokenType::FloatLit: children[childName.content].set<float>(std::stof(consume(TokenType::FloatLit).content)); break; + default: throw std::runtime_error("Token type is not a literal!"); + } + if(nextTokenType() != TokenType::RBrace) + consume(TokenType::Comma); + } + consume(TokenType::RBrace); + + return tokenPos; + } } @@ -1,7 +1,12 @@ +#pragma once + +#include "tokenizer.h" + #include <cstddef> #include <map> #include <memory> #include <string> +#include <vector> namespace TehJSON { @@ -12,14 +17,19 @@ namespace TehJSON JSON(const JSON& other) = default; ~JSON(); + // Writing methods + std::string getSerialized(); + std::string _getSerialized(int currIndent); + + // Reading methods + void readFromString(std::string s); + // Leaf methods template <typename T> T& get(); template <typename T> void set(T value); std::string leafType(); - std::string getSerialized(); - std::string _getSerialized(int currIndent); template <typename T> static std::string serializeData(std::shared_ptr<void> data); @@ -30,6 +40,14 @@ namespace TehJSON private: bool isLeaf = false; + // Reading data fields + std::vector<Token> tokens; + int tokenPos = 0; + Token consume(); + Token consume(TokenType type); + TokenType nextTokenType(); + int readFromTokens(std::vector<Token> tokens, int pos); + // Leaf data fields std::shared_ptr<void> data; std::string (*dataSerializer)(std::shared_ptr<void>); diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp new file mode 100644 index 0000000..ac7c215 --- /dev/null +++ b/src/tokenizer.cpp @@ -0,0 +1,95 @@ +#include "tokenizer.h" +#include <stdexcept> + +namespace TehJSON +{ + std::string getTokenName(TokenType t) + { + switch(t) + { + case TokenType::LBrace: return "LBrace"; + case TokenType::RBrace: return "RBrace"; + case TokenType::Colon: return "Colon"; + case TokenType::Comma: return "Comma"; + case TokenType::StringLit: return "StringLit"; + case TokenType::IntLit: return "IntLit"; + case TokenType::FloatLit: return "FloatLit"; + } + } + + void Tokenizer::appendInput(std::string s) + { + input += s; + } + + std::vector<Token> Tokenizer::tokenize() + { + std::vector<Token> tokens; + + int pos = 0; + while(pos < input.size()) + { + char c = input.at(pos); + switch(c) + { + case '\t': + case '\n': + case ' ': break; + case '"': + { + pos++; + if(pos >= input.size()) + throw std::out_of_range("String literal never ends!"); + std::string literalContent = ""; + c = input.at(pos); + while(c != '"') + { + literalContent += std::string{c}; + pos++; + if(pos >= input.size()) + throw std::out_of_range("String literal never ends!"); + c = input.at(pos); + } + tokens.push_back({TokenType::StringLit, literalContent}); + break; + } + case '0' ... '9': { + std::string literalContent{c}; + pos++; + c = input.at(pos); + bool isInt = true; + while((c >= '0' && c <= '9') || (c == '.')) + { + if(c == '.') + { + if(!isInt) + throw std::runtime_error("Cannot have multiple decimal places in float"); + isInt = false; + } + literalContent += std::string{c}; + pos++; + if(pos >= input.size()) + throw std::out_of_range("Number literal never ends!"); + c = input.at(pos); + } + pos--; + tokens.push_back({isInt?TokenType::IntLit:TokenType::FloatLit, literalContent}); + break; + } + case '{': tokens.push_back({TokenType::LBrace, std::string{c}}); break; + case '}': tokens.push_back({TokenType::RBrace, std::string{c}}); break; + case ':': tokens.push_back({TokenType::Colon, std::string{c}}); break; + case ',': tokens.push_back({TokenType::Comma, std::string{c}}); break; + default: throw std::runtime_error("Unexpected character: '" + std::string{c} + "'"); + } + pos++; + } + + return tokens; + } + + std::string Tokenizer::getInput() + { + return input; + } +} diff --git a/src/tokenizer.h b/src/tokenizer.h new file mode 100644 index 0000000..3322553 --- /dev/null +++ b/src/tokenizer.h @@ -0,0 +1,36 @@ +#pragma once + +#include <string> +#include <vector> + +namespace TehJSON +{ + enum struct TokenType + { + LBrace, + RBrace, + Colon, + Comma, + StringLit, + IntLit, + FloatLit, + }; + + std::string getTokenName(TokenType t); + + struct Token + { + TokenType type; + std::string content; + }; + + class Tokenizer + { + public: + void appendInput(std::string s); + std::vector<Token> tokenize(); + std::string getInput(); + private: + std::string input = ""; + }; +} |
