summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDylan <boss@tehbox.org>2026-06-06 19:36:41 +1200
committerDylan <boss@tehbox.org>2026-06-06 19:36:41 +1200
commit1d379a5cf34475f66f2ab9359f77dac162c0a40e (patch)
tree395d54815331fbcad053001bf5cb28fafb69e9d4 /src
parent46c896bcd78d31130321562b0659e28230261b8e (diff)
downloadtehjson-1d379a5cf34475f66f2ab9359f77dac162c0a40e.tar.gz
tehjson-1d379a5cf34475f66f2ab9359f77dac162c0a40e.zip
feat: JSON reading
- Implemented a tokenizer for json - Implemented a method which will read json from a string using the tokenizer
Diffstat (limited to 'src')
-rw-r--r--src/json.cpp61
-rw-r--r--src/json.h22
-rw-r--r--src/tokenizer.cpp95
-rw-r--r--src/tokenizer.h36
4 files changed, 212 insertions, 2 deletions
diff --git a/src/json.cpp b/src/json.cpp
index c11ca6e..9c1c3f9 100644
--- a/src/json.cpp
+++ b/src/json.cpp
@@ -1,7 +1,10 @@
#include "json.h"
+#include "tokenizer.h"
#include <cstddef>
+#include <iostream>
#include <stdexcept>
+#include <string>
namespace TehJSON
{
@@ -66,4 +69,62 @@ namespace TehJSON
throw std::runtime_error("Node is a leaf!");
return children.size();
}
+
+ Token JSON::consume()
+ {
+ if(tokenPos >= tokens.size())
+ throw std::out_of_range("No tokens left, but json not finished!");
+ return tokens[tokenPos++];
+ }
+
+ Token JSON::consume(TokenType type)
+ {
+ Token t = consume();
+ if(t.type != type)
+ throw std::runtime_error("Wrong token type, expected: " + getTokenName(type) + ", but got: " + getTokenName(t.type));
+ return t;
+ }
+
+ TokenType JSON::nextTokenType()
+ {
+ if(tokenPos >= tokens.size())
+ throw std::out_of_range("No tokens left, but json not finished!");
+ return tokens[tokenPos].type;
+ }
+
+ void JSON::readFromString(std::string s)
+ {
+ Tokenizer tokenizer;
+ tokenizer.appendInput(s);
+ std::vector<Token> stringTokens = tokenizer.tokenize();
+
+ readFromTokens(stringTokens, 0);
+ }
+
+ int JSON::readFromTokens(std::vector<Token> tokens, int pos)
+ {
+ this->tokens = tokens;
+ tokenPos = pos;
+
+ consume(TokenType::LBrace);
+ while(nextTokenType() != TokenType::RBrace)
+ {
+ Token childName = consume(TokenType::StringLit);
+ // std::cout << "Child: " << childName.content << std::endl;
+ consume(TokenType::Colon);
+ switch(nextTokenType())
+ {
+ case TokenType::LBrace: tokenPos = children[childName.content].readFromTokens(tokens, tokenPos); break;
+ case TokenType::StringLit: children[childName.content].set<std::string>(consume(TokenType::StringLit).content); break;
+ case TokenType::IntLit: children[childName.content].set<int>(std::stoi(consume(TokenType::IntLit).content)); break;
+ case TokenType::FloatLit: children[childName.content].set<float>(std::stof(consume(TokenType::FloatLit).content)); break;
+ default: throw std::runtime_error("Token type is not a literal!");
+ }
+ if(nextTokenType() != TokenType::RBrace)
+ consume(TokenType::Comma);
+ }
+ consume(TokenType::RBrace);
+
+ return tokenPos;
+ }
}
diff --git a/src/json.h b/src/json.h
index d1d7938..efde542 100644
--- a/src/json.h
+++ b/src/json.h
@@ -1,7 +1,12 @@
+#pragma once
+
+#include "tokenizer.h"
+
#include <cstddef>
#include <map>
#include <memory>
#include <string>
+#include <vector>
namespace TehJSON
{
@@ -12,14 +17,19 @@ namespace TehJSON
JSON(const JSON& other) = default;
~JSON();
+ // Writing methods
+ std::string getSerialized();
+ std::string _getSerialized(int currIndent);
+
+ // Reading methods
+ void readFromString(std::string s);
+
// Leaf methods
template <typename T>
T& get();
template <typename T>
void set(T value);
std::string leafType();
- std::string getSerialized();
- std::string _getSerialized(int currIndent);
template <typename T>
static std::string serializeData(std::shared_ptr<void> data);
@@ -30,6 +40,14 @@ namespace TehJSON
private:
bool isLeaf = false;
+ // Reading data fields
+ std::vector<Token> tokens;
+ int tokenPos = 0;
+ Token consume();
+ Token consume(TokenType type);
+ TokenType nextTokenType();
+ int readFromTokens(std::vector<Token> tokens, int pos);
+
// Leaf data fields
std::shared_ptr<void> data;
std::string (*dataSerializer)(std::shared_ptr<void>);
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
new file mode 100644
index 0000000..ac7c215
--- /dev/null
+++ b/src/tokenizer.cpp
@@ -0,0 +1,95 @@
+#include "tokenizer.h"
+#include <stdexcept>
+
+namespace TehJSON
+{
+ std::string getTokenName(TokenType t)
+ {
+ switch(t)
+ {
+ case TokenType::LBrace: return "LBrace";
+ case TokenType::RBrace: return "RBrace";
+ case TokenType::Colon: return "Colon";
+ case TokenType::Comma: return "Comma";
+ case TokenType::StringLit: return "StringLit";
+ case TokenType::IntLit: return "IntLit";
+ case TokenType::FloatLit: return "FloatLit";
+ }
+ }
+
+ void Tokenizer::appendInput(std::string s)
+ {
+ input += s;
+ }
+
+ std::vector<Token> Tokenizer::tokenize()
+ {
+ std::vector<Token> tokens;
+
+ int pos = 0;
+ while(pos < input.size())
+ {
+ char c = input.at(pos);
+ switch(c)
+ {
+ case '\t':
+ case '\n':
+ case ' ': break;
+ case '"':
+ {
+ pos++;
+ if(pos >= input.size())
+ throw std::out_of_range("String literal never ends!");
+ std::string literalContent = "";
+ c = input.at(pos);
+ while(c != '"')
+ {
+ literalContent += std::string{c};
+ pos++;
+ if(pos >= input.size())
+ throw std::out_of_range("String literal never ends!");
+ c = input.at(pos);
+ }
+ tokens.push_back({TokenType::StringLit, literalContent});
+ break;
+ }
+ case '0' ... '9': {
+ std::string literalContent{c};
+ pos++;
+ c = input.at(pos);
+ bool isInt = true;
+ while((c >= '0' && c <= '9') || (c == '.'))
+ {
+ if(c == '.')
+ {
+ if(!isInt)
+ throw std::runtime_error("Cannot have multiple decimal places in float");
+ isInt = false;
+ }
+ literalContent += std::string{c};
+ pos++;
+ if(pos >= input.size())
+ throw std::out_of_range("Number literal never ends!");
+ c = input.at(pos);
+ }
+ pos--;
+ tokens.push_back({isInt?TokenType::IntLit:TokenType::FloatLit, literalContent});
+ break;
+ }
+ case '{': tokens.push_back({TokenType::LBrace, std::string{c}}); break;
+ case '}': tokens.push_back({TokenType::RBrace, std::string{c}}); break;
+ case ':': tokens.push_back({TokenType::Colon, std::string{c}}); break;
+ case ',': tokens.push_back({TokenType::Comma, std::string{c}}); break;
+ default: throw std::runtime_error("Unexpected character: '" + std::string{c} + "'");
+ }
+ pos++;
+ }
+
+ return tokens;
+ }
+
+ std::string Tokenizer::getInput()
+ {
+ return input;
+ }
+}
diff --git a/src/tokenizer.h b/src/tokenizer.h
new file mode 100644
index 0000000..3322553
--- /dev/null
+++ b/src/tokenizer.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <string>
+#include <vector>
+
+namespace TehJSON
+{
+ enum struct TokenType
+ {
+ LBrace,
+ RBrace,
+ Colon,
+ Comma,
+ StringLit,
+ IntLit,
+ FloatLit,
+ };
+
+ std::string getTokenName(TokenType t);
+
+ struct Token
+ {
+ TokenType type;
+ std::string content;
+ };
+
+ class Tokenizer
+ {
+ public:
+ void appendInput(std::string s);
+ std::vector<Token> tokenize();
+ std::string getInput();
+ private:
+ std::string input = "";
+ };
+}