feat: JSON reading

- Implemented a tokenizer for json - Implemented a method which will read json from a string using the tokenizer
author: Dylan <boss@tehbox.org> 2026-06-06 19:36:41 +1200
committer: Dylan <boss@tehbox.org> 2026-06-06 19:36:41 +1200
commit: 1d379a5cf34475f66f2ab9359f77dac162c0a40e (patch)
tree: 395d54815331fbcad053001bf5cb28fafb69e9d4 /src/tokenizer.cpp
parent: 46c896bcd78d31130321562b0659e28230261b8e (diff)
download: tehjson-1d379a5cf34475f66f2ab9359f77dac162c0a40e.tar.gz
tehjson-1d379a5cf34475f66f2ab9359f77dac162c0a40e.zip
1 files changed, 95 insertions, 0 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
new file mode 100644
index 0000000..ac7c215
--- /dev/null
+++ b/src/tokenizer.cpp
@@ -0,0 +1,95 @@
+#include "tokenizer.h"
+#include <stdexcept>
+
+namespace TehJSON
+{
+	std::string getTokenName(TokenType t)
+	{
+		switch(t)
+		{
+		case TokenType::LBrace: return "LBrace";
+		case TokenType::RBrace: return "RBrace";
+		case TokenType::Colon: return "Colon";
+		case TokenType::Comma: return "Comma";
+		case TokenType::StringLit: return "StringLit";
+		case TokenType::IntLit: return "IntLit";
+		case TokenType::FloatLit: return "FloatLit";
+		}
+	}
+	
+	void Tokenizer::appendInput(std::string s)
+	{
+		input += s;
+	}
+
+	std::vector<Token> Tokenizer::tokenize()
+	{
+		std::vector<Token> tokens;
+
+		int pos = 0;
+		while(pos < input.size())
+		{
+			char c = input.at(pos);
+			switch(c)
+			{
+			case '\t':
+			case '\n':
+			case ' ': break;
+			case '"':
+			{
+				pos++;
+				if(pos >= input.size())
+					throw std::out_of_range("String literal never ends!");
+				std::string literalContent = "";
+				c = input.at(pos);
+				while(c != '"')
+				{
+					literalContent += std::string{c};
+					pos++;
+					if(pos >= input.size())
+						throw std::out_of_range("String literal never ends!");
+					c = input.at(pos);
+				}
+				tokens.push_back({TokenType::StringLit, literalContent});
+				break;
+			}
+			case '0' ... '9': {
+				std::string literalContent{c};
+				pos++;
+				c = input.at(pos);
+				bool isInt = true;
+				while((c >= '0' && c <= '9') || (c == '.'))
+				{
+					if(c == '.')
+					{
+						if(!isInt)
+							throw std::runtime_error("Cannot have multiple decimal places in float");
+						isInt = false;
+					}
+					literalContent += std::string{c};
+					pos++;
+					if(pos >= input.size())
+						throw std::out_of_range("Number literal never ends!");
+					c = input.at(pos);
+				}
+				pos--;
+				tokens.push_back({isInt?TokenType::IntLit:TokenType::FloatLit, literalContent});
+				break;
+			}
+			case '{': tokens.push_back({TokenType::LBrace, std::string{c}}); break;
+			case '}': tokens.push_back({TokenType::RBrace, std::string{c}}); break;
+			case ':': tokens.push_back({TokenType::Colon, std::string{c}}); break;
+			case ',': tokens.push_back({TokenType::Comma, std::string{c}}); break;
+			default: throw std::runtime_error("Unexpected character: '" + std::string{c} + "'");
+			}
+			pos++;
+		}
+
+		return tokens;
+	}
+
+	std::string Tokenizer::getInput()
+	{
+		return input;
+	}
+}
author	Dylan <boss@tehbox.org>	2026-06-06 19:36:41 +1200
committer	Dylan <boss@tehbox.org>	2026-06-06 19:36:41 +1200
commit	1d379a5cf34475f66f2ab9359f77dac162c0a40e (patch)
tree	395d54815331fbcad053001bf5cb28fafb69e9d4 /src/tokenizer.cpp
parent	46c896bcd78d31130321562b0659e28230261b8e (diff)
download	tehjson-1d379a5cf34475f66f2ab9359f77dac162c0a40e.tar.gz tehjson-1d379a5cf34475f66f2ab9359f77dac162c0a40e.zip