diff options
| author | Dylan <boss@tehbox.org> | 2026-06-06 19:36:41 +1200 |
|---|---|---|
| committer | Dylan <boss@tehbox.org> | 2026-06-06 19:36:41 +1200 |
| commit | 1d379a5cf34475f66f2ab9359f77dac162c0a40e (patch) | |
| tree | 395d54815331fbcad053001bf5cb28fafb69e9d4 /src/tokenizer.cpp | |
| parent | 46c896bcd78d31130321562b0659e28230261b8e (diff) | |
| download | tehjson-1d379a5cf34475f66f2ab9359f77dac162c0a40e.tar.gz tehjson-1d379a5cf34475f66f2ab9359f77dac162c0a40e.zip | |
feat: JSON reading
- Implemented a tokenizer for json
- Implemented a method which will read json from a string using the tokenizer
Diffstat (limited to 'src/tokenizer.cpp')
| -rw-r--r-- | src/tokenizer.cpp | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp new file mode 100644 index 0000000..ac7c215 --- /dev/null +++ b/src/tokenizer.cpp @@ -0,0 +1,95 @@ +#include "tokenizer.h" +#include <stdexcept> + +namespace TehJSON +{ + std::string getTokenName(TokenType t) + { + switch(t) + { + case TokenType::LBrace: return "LBrace"; + case TokenType::RBrace: return "RBrace"; + case TokenType::Colon: return "Colon"; + case TokenType::Comma: return "Comma"; + case TokenType::StringLit: return "StringLit"; + case TokenType::IntLit: return "IntLit"; + case TokenType::FloatLit: return "FloatLit"; + } + } + + void Tokenizer::appendInput(std::string s) + { + input += s; + } + + std::vector<Token> Tokenizer::tokenize() + { + std::vector<Token> tokens; + + int pos = 0; + while(pos < input.size()) + { + char c = input.at(pos); + switch(c) + { + case '\t': + case '\n': + case ' ': break; + case '"': + { + pos++; + if(pos >= input.size()) + throw std::out_of_range("String literal never ends!"); + std::string literalContent = ""; + c = input.at(pos); + while(c != '"') + { + literalContent += std::string{c}; + pos++; + if(pos >= input.size()) + throw std::out_of_range("String literal never ends!"); + c = input.at(pos); + } + tokens.push_back({TokenType::StringLit, literalContent}); + break; + } + case '0' ... '9': { + std::string literalContent{c}; + pos++; + c = input.at(pos); + bool isInt = true; + while((c >= '0' && c <= '9') || (c == '.')) + { + if(c == '.') + { + if(!isInt) + throw std::runtime_error("Cannot have multiple decimal places in float"); + isInt = false; + } + literalContent += std::string{c}; + pos++; + if(pos >= input.size()) + throw std::out_of_range("Number literal never ends!"); + c = input.at(pos); + } + pos--; + tokens.push_back({isInt?TokenType::IntLit:TokenType::FloatLit, literalContent}); + break; + } + case '{': tokens.push_back({TokenType::LBrace, std::string{c}}); break; + case '}': tokens.push_back({TokenType::RBrace, std::string{c}}); break; + case ':': tokens.push_back({TokenType::Colon, std::string{c}}); break; + case ',': tokens.push_back({TokenType::Comma, std::string{c}}); break; + default: throw std::runtime_error("Unexpected character: '" + std::string{c} + "'"); + } + pos++; + } + + return tokens; + } + + std::string Tokenizer::getInput() + { + return input; + } +} |
