summaryrefslogtreecommitdiff
path: root/src/tokenizer.cpp
blob: ac7c215af5e4ca38aab3ddcb7acf6949f5241df0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include "tokenizer.h"
#include <stdexcept>

namespace TehJSON
{
	std::string getTokenName(TokenType t)
	{
		switch(t)
		{
		case TokenType::LBrace: return "LBrace";
		case TokenType::RBrace: return "RBrace";
		case TokenType::Colon: return "Colon";
		case TokenType::Comma: return "Comma";
		case TokenType::StringLit: return "StringLit";
		case TokenType::IntLit: return "IntLit";
		case TokenType::FloatLit: return "FloatLit";
		}
	}
	
	void Tokenizer::appendInput(std::string s)
	{
		input += s;
	}

	std::vector<Token> Tokenizer::tokenize()
	{
		std::vector<Token> tokens;

		int pos = 0;
		while(pos < input.size())
		{
			char c = input.at(pos);
			switch(c)
			{
			case '\t':
			case '\n':
			case ' ': break;
			case '"':
			{
				pos++;
				if(pos >= input.size())
					throw std::out_of_range("String literal never ends!");
				std::string literalContent = "";
				c = input.at(pos);
				while(c != '"')
				{
					literalContent += std::string{c};
					pos++;
					if(pos >= input.size())
						throw std::out_of_range("String literal never ends!");
					c = input.at(pos);
				}
				tokens.push_back({TokenType::StringLit, literalContent});
				break;
			}
			case '0' ... '9': {
				std::string literalContent{c};
				pos++;
				c = input.at(pos);
				bool isInt = true;
				while((c >= '0' && c <= '9') || (c == '.'))
				{
					if(c == '.')
					{
						if(!isInt)
							throw std::runtime_error("Cannot have multiple decimal places in float");
						isInt = false;
					}
					literalContent += std::string{c};
					pos++;
					if(pos >= input.size())
						throw std::out_of_range("Number literal never ends!");
					c = input.at(pos);
				}
				pos--;
				tokens.push_back({isInt?TokenType::IntLit:TokenType::FloatLit, literalContent});
				break;
			}
			case '{': tokens.push_back({TokenType::LBrace, std::string{c}}); break;
			case '}': tokens.push_back({TokenType::RBrace, std::string{c}}); break;
			case ':': tokens.push_back({TokenType::Colon, std::string{c}}); break;
			case ',': tokens.push_back({TokenType::Comma, std::string{c}}); break;
			default: throw std::runtime_error("Unexpected character: '" + std::string{c} + "'");
			}
			pos++;
		}

		return tokens;
	}

	std::string Tokenizer::getInput()
	{
		return input;
	}
}