From 1d379a5cf34475f66f2ab9359f77dac162c0a40e Mon Sep 17 00:00:00 2001 From: Dylan Date: Sat, 6 Jun 2026 19:36:41 +1200 Subject: feat: JSON reading - Implemented a tokenizer for json - Implemented a method which will read json from a string using the tokenizer --- src/tokenizer.cpp | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/tokenizer.cpp (limited to 'src/tokenizer.cpp') diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp new file mode 100644 index 0000000..ac7c215 --- /dev/null +++ b/src/tokenizer.cpp @@ -0,0 +1,95 @@ +#include "tokenizer.h" +#include + +namespace TehJSON +{ + std::string getTokenName(TokenType t) + { + switch(t) + { + case TokenType::LBrace: return "LBrace"; + case TokenType::RBrace: return "RBrace"; + case TokenType::Colon: return "Colon"; + case TokenType::Comma: return "Comma"; + case TokenType::StringLit: return "StringLit"; + case TokenType::IntLit: return "IntLit"; + case TokenType::FloatLit: return "FloatLit"; + } + } + + void Tokenizer::appendInput(std::string s) + { + input += s; + } + + std::vector Tokenizer::tokenize() + { + std::vector tokens; + + int pos = 0; + while(pos < input.size()) + { + char c = input.at(pos); + switch(c) + { + case '\t': + case '\n': + case ' ': break; + case '"': + { + pos++; + if(pos >= input.size()) + throw std::out_of_range("String literal never ends!"); + std::string literalContent = ""; + c = input.at(pos); + while(c != '"') + { + literalContent += std::string{c}; + pos++; + if(pos >= input.size()) + throw std::out_of_range("String literal never ends!"); + c = input.at(pos); + } + tokens.push_back({TokenType::StringLit, literalContent}); + break; + } + case '0' ... '9': { + std::string literalContent{c}; + pos++; + c = input.at(pos); + bool isInt = true; + while((c >= '0' && c <= '9') || (c == '.')) + { + if(c == '.') + { + if(!isInt) + throw std::runtime_error("Cannot have multiple decimal places in float"); + isInt = false; + } + literalContent += std::string{c}; + pos++; + if(pos >= input.size()) + throw std::out_of_range("Number literal never ends!"); + c = input.at(pos); + } + pos--; + tokens.push_back({isInt?TokenType::IntLit:TokenType::FloatLit, literalContent}); + break; + } + case '{': tokens.push_back({TokenType::LBrace, std::string{c}}); break; + case '}': tokens.push_back({TokenType::RBrace, std::string{c}}); break; + case ':': tokens.push_back({TokenType::Colon, std::string{c}}); break; + case ',': tokens.push_back({TokenType::Comma, std::string{c}}); break; + default: throw std::runtime_error("Unexpected character: '" + std::string{c} + "'"); + } + pos++; + } + + return tokens; + } + + std::string Tokenizer::getInput() + { + return input; + } +} -- cgit v1.2.3