]>
jfr.im git - irc/quakenet/qwebirc.git/blob - esimplejson/decoder.py
2 Implementation of JSONDecoder
7 from esimplejson
.scanner
import Scanner
, pattern
9 from esimplejson
._speedups
import scanstring
as c_scanstring
13 FLAGS
= re
.VERBOSE | re
.MULTILINE | re
.DOTALL
15 def _floatconstants():
18 _BYTES
= '7FF80000000000007FF0000000000000'.decode('hex')
19 if sys
.byteorder
!= 'big':
20 _BYTES
= _BYTES
[:8][::-1] + _BYTES
[8:][::-1]
21 nan
, inf
= struct
.unpack('dd', _BYTES
)
24 NaN
, PosInf
, NegInf
= _floatconstants()
27 def linecol(doc
, pos
):
28 lineno
= doc
.count('\n', 0, pos
) + 1
32 colno
= pos
- doc
.rindex('\n', 0, pos
)
36 def errmsg(msg
, doc
, pos
, end
=None):
37 lineno
, colno
= linecol(doc
, pos
)
39 return '%s: line %d column %d (char %d)' % (msg
, lineno
, colno
, pos
)
40 endlineno
, endcolno
= linecol(doc
, end
)
41 return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
42 msg
, lineno
, colno
, endlineno
, endcolno
, pos
, end
)
54 def JSONConstant(match
, context
, c
=_CONSTANTS
):
56 fn
= getattr(context
, 'parse_constant', None)
62 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant
)
65 def JSONNumber(match
, context
):
66 match
= JSONNumber
.regex
.match(match
.string
, *match
.span())
67 integer
, frac
, exp
= match
.groups()
69 fn
= getattr(context
, 'parse_float', None) or float
70 res
= fn(integer
+ (frac
or '') + (exp
or ''))
72 fn
= getattr(context
, 'parse_int', None) or int
75 pattern(r
'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber
)
78 STRINGCHUNK
= re
.compile(r
'(.*?)(["\\\x00-\x1f])', FLAGS
)
80 '"': u
'"', '\\': u
'\\', '/': u
'/',
81 'b': u
'\b', 'f': u
'\f', 'n': u
'\n', 'r': u
'\r', 't': u
'\t',
84 DEFAULT_ENCODING
= "utf-8"
86 def py_scanstring(s
, end
, encoding
=None, strict
=True, _b
=BACKSLASH
, _m
=STRINGCHUNK
.match
):
88 encoding
= DEFAULT_ENCODING
90 _append
= chunks
.append
96 errmsg("Unterminated string starting at", s
, begin
))
98 content
, terminator
= chunk
.groups()
100 if not isinstance(content
, unicode):
101 content
= unicode(content
, encoding
)
103 if terminator
== '"':
105 elif terminator
!= '\\':
107 raise ValueError(errmsg("Invalid control character %r at", s
, end
))
115 errmsg("Unterminated string starting at", s
, begin
))
121 errmsg("Invalid \\escape: %r" % (esc
,), s
, end
))
124 esc
= s
[end
+ 1:end
+ 5]
126 msg
= "Invalid \\uXXXX escape"
131 if 0xd800 <= uni
<= 0xdbff and sys
.maxunicode
> 65535:
132 msg
= "Invalid \\uXXXX\\uXXXX surrogate pair"
133 if not s
[end
+ 5:end
+ 7] == '\\u':
135 esc2
= s
[end
+ 7:end
+ 11]
139 uni
= 0x10000 + (((uni
- 0xd800) << 10) |
(uni2
- 0xdc00))
143 raise ValueError(errmsg(msg
, s
, end
))
146 return u
''.join(chunks
), end
151 scanstring
= c_scanstring
153 scanstring
= py_scanstring
155 def JSONString(match
, context
):
156 encoding
= getattr(context
, 'encoding', None)
157 strict
= getattr(context
, 'strict', True)
158 return scanstring(match
.string
, match
.end(), encoding
, strict
)
159 pattern(r
'"')(JSONString
)
162 WHITESPACE
= re
.compile(r
'\s*', FLAGS
)
164 def JSONObject(match
, context
, _w
=WHITESPACE
.match
):
167 end
= _w(s
, match
.end()).end()
168 nextchar
= s
[end
:end
+ 1]
169 # Trivial empty object
171 return pairs
, end
+ 1
173 raise ValueError(errmsg("Expecting property name", s
, end
))
175 encoding
= getattr(context
, 'encoding', None)
176 strict
= getattr(context
, 'strict', True)
177 iterscan
= JSONScanner
.iterscan
179 key
, end
= scanstring(s
, end
, encoding
, strict
)
180 end
= _w(s
, end
).end()
181 if s
[end
:end
+ 1] != ':':
182 raise ValueError(errmsg("Expecting : delimiter", s
, end
))
183 end
= _w(s
, end
+ 1).end()
185 value
, end
= iterscan(s
, idx
=end
, context
=context
).next()
186 except StopIteration:
187 raise ValueError(errmsg("Expecting object", s
, end
))
189 end
= _w(s
, end
).end()
190 nextchar
= s
[end
:end
+ 1]
195 raise ValueError(errmsg("Expecting , delimiter", s
, end
- 1))
196 end
= _w(s
, end
).end()
197 nextchar
= s
[end
:end
+ 1]
200 raise ValueError(errmsg("Expecting property name", s
, end
- 1))
201 object_hook
= getattr(context
, 'object_hook', None)
202 if object_hook
is not None:
203 pairs
= object_hook(pairs
)
205 pattern(r
'{')(JSONObject
)
208 def JSONArray(match
, context
, _w
=WHITESPACE
.match
):
211 end
= _w(s
, match
.end()).end()
212 # Look-ahead for trivial empty array
213 nextchar
= s
[end
:end
+ 1]
215 return values
, end
+ 1
216 iterscan
= JSONScanner
.iterscan
219 value
, end
= iterscan(s
, idx
=end
, context
=context
).next()
220 except StopIteration:
221 raise ValueError(errmsg("Expecting object", s
, end
))
223 end
= _w(s
, end
).end()
224 nextchar
= s
[end
:end
+ 1]
229 raise ValueError(errmsg("Expecting , delimiter", s
, end
))
230 end
= _w(s
, end
).end()
232 pattern(r
'\[')(JSONArray
)
243 JSONScanner
= Scanner(ANYTHING
)
246 class JSONDecoder(object):
248 Simple JSON <http://json.org> decoder
250 Performs the following translations in decoding by default:
252 +---------------+-------------------+
254 +===============+===================+
256 +---------------+-------------------+
258 +---------------+-------------------+
260 +---------------+-------------------+
261 | number (int) | int, long |
262 +---------------+-------------------+
263 | number (real) | float |
264 +---------------+-------------------+
266 +---------------+-------------------+
268 +---------------+-------------------+
270 +---------------+-------------------+
272 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
273 their corresponding ``float`` values, which is outside the JSON spec.
276 _scanner
= Scanner(ANYTHING
)
277 __all__
= ['__init__', 'decode', 'raw_decode']
279 def __init__(self
, encoding
=None, object_hook
=None, parse_float
=None,
280 parse_int
=None, parse_constant
=None, strict
=True):
282 ``encoding`` determines the encoding used to interpret any ``str``
283 objects decoded by this instance (utf-8 by default). It has no
284 effect when decoding ``unicode`` objects.
286 Note that currently only encodings that are a superset of ASCII work,
287 strings of other encodings should be passed in as ``unicode``.
289 ``object_hook``, if specified, will be called with the result
290 of every JSON object decoded and its return value will be used in
291 place of the given ``dict``. This can be used to provide custom
292 deserializations (e.g. to support JSON-RPC class hinting).
294 ``parse_float``, if specified, will be called with the string
295 of every JSON float to be decoded. By default this is equivalent to
296 float(num_str). This can be used to use another datatype or parser
297 for JSON floats (e.g. decimal.Decimal).
299 ``parse_int``, if specified, will be called with the string
300 of every JSON int to be decoded. By default this is equivalent to
301 int(num_str). This can be used to use another datatype or parser
302 for JSON integers (e.g. float).
304 ``parse_constant``, if specified, will be called with one of the
305 following strings: -Infinity, Infinity, NaN, null, true, false.
306 This can be used to raise an exception if invalid JSON numbers
309 self
.encoding
= encoding
310 self
.object_hook
= object_hook
311 self
.parse_float
= parse_float
312 self
.parse_int
= parse_int
313 self
.parse_constant
= parse_constant
316 def decode(self
, s
, _w
=WHITESPACE
.match
):
318 Return the Python representation of ``s`` (a ``str`` or ``unicode``
319 instance containing a JSON document)
321 obj
, end
= self
.raw_decode(s
, idx
=_w(s
, 0).end())
322 end
= _w(s
, end
).end()
324 raise ValueError(errmsg("Extra data", s
, end
, len(s
)))
327 def raw_decode(self
, s
, **kw
):
329 Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
330 with a JSON document) and return a 2-tuple of the Python
331 representation and the index in ``s`` where the document ended.
333 This can be used to decode a JSON document from a string that may
334 have extraneous data at the end.
336 kw
.setdefault('context', self
)
338 obj
, end
= self
._scanner
.iterscan(s
, **kw
).next()
339 except StopIteration:
340 raise ValueError("No JSON object could be decoded")
343 __all__
= ['JSONDecoder']