blob: ac7c215af5e4ca38aab3ddcb7acf6949f5241df0 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
#include "tokenizer.h"
#include <stdexcept>
namespace TehJSON
{
std::string getTokenName(TokenType t)
{
switch(t)
{
case TokenType::LBrace: return "LBrace";
case TokenType::RBrace: return "RBrace";
case TokenType::Colon: return "Colon";
case TokenType::Comma: return "Comma";
case TokenType::StringLit: return "StringLit";
case TokenType::IntLit: return "IntLit";
case TokenType::FloatLit: return "FloatLit";
}
}
void Tokenizer::appendInput(std::string s)
{
input += s;
}
std::vector<Token> Tokenizer::tokenize()
{
std::vector<Token> tokens;
int pos = 0;
while(pos < input.size())
{
char c = input.at(pos);
switch(c)
{
case '\t':
case '\n':
case ' ': break;
case '"':
{
pos++;
if(pos >= input.size())
throw std::out_of_range("String literal never ends!");
std::string literalContent = "";
c = input.at(pos);
while(c != '"')
{
literalContent += std::string{c};
pos++;
if(pos >= input.size())
throw std::out_of_range("String literal never ends!");
c = input.at(pos);
}
tokens.push_back({TokenType::StringLit, literalContent});
break;
}
case '0' ... '9': {
std::string literalContent{c};
pos++;
c = input.at(pos);
bool isInt = true;
while((c >= '0' && c <= '9') || (c == '.'))
{
if(c == '.')
{
if(!isInt)
throw std::runtime_error("Cannot have multiple decimal places in float");
isInt = false;
}
literalContent += std::string{c};
pos++;
if(pos >= input.size())
throw std::out_of_range("Number literal never ends!");
c = input.at(pos);
}
pos--;
tokens.push_back({isInt?TokenType::IntLit:TokenType::FloatLit, literalContent});
break;
}
case '{': tokens.push_back({TokenType::LBrace, std::string{c}}); break;
case '}': tokens.push_back({TokenType::RBrace, std::string{c}}); break;
case ':': tokens.push_back({TokenType::Colon, std::string{c}}); break;
case ',': tokens.push_back({TokenType::Comma, std::string{c}}); break;
default: throw std::runtime_error("Unexpected character: '" + std::string{c} + "'");
}
pos++;
}
return tokens;
}
std::string Tokenizer::getInput()
{
return input;
}
}
|