summaryrefslogtreecommitdiff
path: root/src/tokenizer.cpp
diff options
context:
space:
mode:
authorDylan <boss@tehbox.org>2026-06-06 19:36:41 +1200
committerDylan <boss@tehbox.org>2026-06-06 19:36:41 +1200
commit1d379a5cf34475f66f2ab9359f77dac162c0a40e (patch)
tree395d54815331fbcad053001bf5cb28fafb69e9d4 /src/tokenizer.cpp
parent46c896bcd78d31130321562b0659e28230261b8e (diff)
downloadtehjson-1d379a5cf34475f66f2ab9359f77dac162c0a40e.tar.gz
tehjson-1d379a5cf34475f66f2ab9359f77dac162c0a40e.zip
feat: JSON reading
- Implemented a tokenizer for json - Implemented a method which will read json from a string using the tokenizer
Diffstat (limited to 'src/tokenizer.cpp')
-rw-r--r--src/tokenizer.cpp95
1 files changed, 95 insertions, 0 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
new file mode 100644
index 0000000..ac7c215
--- /dev/null
+++ b/src/tokenizer.cpp
@@ -0,0 +1,95 @@
+#include "tokenizer.h"
+#include <stdexcept>
+
+namespace TehJSON
+{
+ std::string getTokenName(TokenType t)
+ {
+ switch(t)
+ {
+ case TokenType::LBrace: return "LBrace";
+ case TokenType::RBrace: return "RBrace";
+ case TokenType::Colon: return "Colon";
+ case TokenType::Comma: return "Comma";
+ case TokenType::StringLit: return "StringLit";
+ case TokenType::IntLit: return "IntLit";
+ case TokenType::FloatLit: return "FloatLit";
+ }
+ }
+
+ void Tokenizer::appendInput(std::string s)
+ {
+ input += s;
+ }
+
+ std::vector<Token> Tokenizer::tokenize()
+ {
+ std::vector<Token> tokens;
+
+ int pos = 0;
+ while(pos < input.size())
+ {
+ char c = input.at(pos);
+ switch(c)
+ {
+ case '\t':
+ case '\n':
+ case ' ': break;
+ case '"':
+ {
+ pos++;
+ if(pos >= input.size())
+ throw std::out_of_range("String literal never ends!");
+ std::string literalContent = "";
+ c = input.at(pos);
+ while(c != '"')
+ {
+ literalContent += std::string{c};
+ pos++;
+ if(pos >= input.size())
+ throw std::out_of_range("String literal never ends!");
+ c = input.at(pos);
+ }
+ tokens.push_back({TokenType::StringLit, literalContent});
+ break;
+ }
+ case '0' ... '9': {
+ std::string literalContent{c};
+ pos++;
+ c = input.at(pos);
+ bool isInt = true;
+ while((c >= '0' && c <= '9') || (c == '.'))
+ {
+ if(c == '.')
+ {
+ if(!isInt)
+ throw std::runtime_error("Cannot have multiple decimal places in float");
+ isInt = false;
+ }
+ literalContent += std::string{c};
+ pos++;
+ if(pos >= input.size())
+ throw std::out_of_range("Number literal never ends!");
+ c = input.at(pos);
+ }
+ pos--;
+ tokens.push_back({isInt?TokenType::IntLit:TokenType::FloatLit, literalContent});
+ break;
+ }
+ case '{': tokens.push_back({TokenType::LBrace, std::string{c}}); break;
+ case '}': tokens.push_back({TokenType::RBrace, std::string{c}}); break;
+ case ':': tokens.push_back({TokenType::Colon, std::string{c}}); break;
+ case ',': tokens.push_back({TokenType::Comma, std::string{c}}); break;
+ default: throw std::runtime_error("Unexpected character: '" + std::string{c} + "'");
+ }
+ pos++;
+ }
+
+ return tokens;
+ }
+
+ std::string Tokenizer::getInput()
+ {
+ return input;
+ }
+}