-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.go
228 lines (202 loc) · 5.15 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
package astjson
import (
"strconv"
)
// Parse transforms the json bytes to AST value, it will return nil or panic
// when the input is invalid.
// todo: fix me: return error instead of panic
func Parse(bs []byte) *Value {
return NewParser(bs).Parse()
}
// Parser helps to parse the bytes to AST value
type Parser struct {
bs []byte
l *lexer
}
// Parse returns the valid AST value, nil or panic
// todo: deprecated it because we want to return error instead of panic
func (p *Parser) Parse() *Value {
p.l.Reset()
tk := p.nextExceptWhitespace()
return p.parse(tk)
}
// parse helps to get a whole object, array or a literal type.
func (p *Parser) parse(tk token) *Value {
switch tk.tp {
case tkNumber, tkString, tkBool, tkNull:
return literal(p.bs, tk)
case tkEOF:
return nil
case tkArrayStart:
return p.arrayParser()
case tkObjectStart:
return p.objectParser()
default:
panic("invalid json syntax")
}
}
// verifyNextType verifies whether the next ntp node type satisfies
// the array type. It returns true when the array is empty or the type is same
// with the last element.
// because we always verify the type before appending the array, it's safe to
// compare the tail element.
func (a *ArrayAst) verifyNextType(ntp NodeType) bool {
if len(a.Values) == 0 {
return true
}
if a.Values[len(a.Values)-1].NodeType == ntp {
return true
}
return false
}
// arrayParser parses the remained part of an array after tkArrayStart is found before.
func (p *Parser) arrayParser() *Value {
var ar ArrayAst
for {
tk := p.nextExceptWhitespace()
if tk.tp == tkArrayEnd {
return &Value{
NodeType: Array,
AstValue: &ArrayAst{},
}
}
val := p.parse(tk)
if ar.verifyNextType(val.NodeType) {
ar.Values = append(ar.Values, *val)
} else {
panic("inconsistent array value type")
}
// check whether an array ends
then := p.nextExceptWhitespace()
if then.tp == tkArrayEnd {
break
} else if then.tp == tkComma {
continue
} else {
panic("invalid token after colon")
}
}
return &Value{
NodeType: Array,
AstValue: &ar,
}
}
// objectParser parses the remained part of an array after tkObjectStart is found before.
func (p *Parser) objectParser() *Value {
var v ObjectAst
v.KvMap = map[string]Value{}
for {
start := p.nextExceptWhitespace()
// an object is empty {}
if start.tp == tkObjectEnd {
return &Value{
NodeType: Object,
AstValue: &v,
}
}
if start.tp != tkString {
panic("Invalid json schema for key")
}
value := literal(p.bs, start)
key := string(value.AstValue.(StringAst))
if tkColon != p.nextExceptWhitespace().tp {
panic("invalid json schema after key")
}
if _, ok := v.KvMap[key]; ok {
panic("duplicated key")
}
val := p.parse(p.nextExceptWhitespace())
v.KvMap[key] = *val
// check whether an object ends
// todo: refine me: the logic here is duplicated with the beginning of the for loop
then := p.nextExceptWhitespace()
if then.tp == tkObjectEnd {
break
} else if then.tp == tkComma {
continue
} else {
panic("invalid token after colon")
}
}
return &Value{
NodeType: Object,
AstValue: &v,
}
}
// NewParser creates a new Parser to parse full json bytes to AST node.
func NewParser(bs []byte) *Parser {
return &Parser{
bs: bs,
l: newLexer(bs),
}
}
// next keep retrieving tokens and return the token which type is not contained inside skips.
func (p *Parser) next(skips ...Type) token {
shouldSkip := func(tk Type) bool {
for _, skip := range skips {
if tk == skip {
return true
}
}
return false
}
tk := p.l.Scan()
for shouldSkip(tk.tp) {
tk = p.l.Scan()
}
return tk
}
// nextExceptWhitespace returns the token which is not a tkWhiteSpace type.
func (p *Parser) nextExceptWhitespace() token {
return p.next(tkWhiteSpace)
}
// literal constructs the AST value for Number, String, Bool and Null type.
// The AstValue inside Value is not a pointer.
func literal(bs []byte, tk token) *Value {
var v Value
switch tk.tp {
case tkString:
v.NodeType = String
// remove left and right "
// todo: check whether use pointer
v.AstValue = StringAst(bs[tk.leftPos+1 : tk.rightPos-1])
case tkBool:
v.NodeType = Bool
b, _ := strconv.ParseBool(string(bs[tk.leftPos:tk.rightPos]))
// todo: ditto
v.AstValue = BoolAst(b)
case tkNumber:
v.NodeType = Number
// todo: ditto
v.AstValue = tokenNumber(bs, tk)
case tkNull:
// the AstValue of those types are useless
v.NodeType = Null
v.AstValue = &NullAst{}
}
return &v
}
// tokenNumber converts a tkNumber token to a precise number(float, int or uint).
// it panics if the token type isn't tkNumber
func tokenNumber(bs []byte, tk token) NumberAst {
if tk.tp != tkNumber {
panic("token must be a tkNumber token")
}
var numberAst NumberAst
if tk.isFloat {
f, _ := strconv.ParseFloat(string(bs[tk.leftPos:tk.rightPos]), 64)
numberAst.Nt = floatNumber
numberAst.f = f
return numberAst
}
if tk.hasDash {
i, _ := strconv.ParseInt(string(bs[tk.leftPos:tk.rightPos]), 10, 64)
numberAst.Nt = integer
numberAst.i = i
return numberAst
}
u, _ := strconv.ParseUint(string(bs[tk.leftPos:tk.rightPos]), 10, 64)
numberAst.Nt = unsignedInteger
numberAst.u = u
return numberAst
}