]> jfr.im git - irc/quakenet/qwebirc.git/blob - esimplejson/decoder.py
more names into about
[irc/quakenet/qwebirc.git] / esimplejson / decoder.py
1 """
2 Implementation of JSONDecoder
3 """
4 import re
5 import sys
6
7 from esimplejson.scanner import Scanner, pattern
8 try:
9 from esimplejson._speedups import scanstring as c_scanstring
10 except ImportError:
11 pass
12
13 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
14
15 def _floatconstants():
16 import struct
17 import sys
18 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
19 if sys.byteorder != 'big':
20 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
21 nan, inf = struct.unpack('dd', _BYTES)
22 return nan, inf, -inf
23
24 NaN, PosInf, NegInf = _floatconstants()
25
26
27 def linecol(doc, pos):
28 lineno = doc.count('\n', 0, pos) + 1
29 if lineno == 1:
30 colno = pos
31 else:
32 colno = pos - doc.rindex('\n', 0, pos)
33 return lineno, colno
34
35
36 def errmsg(msg, doc, pos, end=None):
37 lineno, colno = linecol(doc, pos)
38 if end is None:
39 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
40 endlineno, endcolno = linecol(doc, end)
41 return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
42 msg, lineno, colno, endlineno, endcolno, pos, end)
43
44
45 _CONSTANTS = {
46 '-Infinity': NegInf,
47 'Infinity': PosInf,
48 'NaN': NaN,
49 'true': True,
50 'false': False,
51 'null': None,
52 }
53
54 def JSONConstant(match, context, c=_CONSTANTS):
55 s = match.group(0)
56 fn = getattr(context, 'parse_constant', None)
57 if fn is None:
58 rval = c[s]
59 else:
60 rval = fn(s)
61 return rval, None
62 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
63
64
65 def JSONNumber(match, context):
66 match = JSONNumber.regex.match(match.string, *match.span())
67 integer, frac, exp = match.groups()
68 if frac or exp:
69 fn = getattr(context, 'parse_float', None) or float
70 res = fn(integer + (frac or '') + (exp or ''))
71 else:
72 fn = getattr(context, 'parse_int', None) or int
73 res = fn(integer)
74 return res, None
75 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
76
77
78 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
79 BACKSLASH = {
80 '"': u'"', '\\': u'\\', '/': u'/',
81 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
82 }
83
84 DEFAULT_ENCODING = "utf-8"
85
86 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
87 if encoding is None:
88 encoding = DEFAULT_ENCODING
89 chunks = []
90 _append = chunks.append
91 begin = end - 1
92 while 1:
93 chunk = _m(s, end)
94 if chunk is None:
95 raise ValueError(
96 errmsg("Unterminated string starting at", s, begin))
97 end = chunk.end()
98 content, terminator = chunk.groups()
99 if content:
100 if not isinstance(content, unicode):
101 content = unicode(content, encoding)
102 _append(content)
103 if terminator == '"':
104 break
105 elif terminator != '\\':
106 if strict:
107 raise ValueError(errmsg("Invalid control character %r at", s, end))
108 else:
109 _append(terminator)
110 continue
111 try:
112 esc = s[end]
113 except IndexError:
114 raise ValueError(
115 errmsg("Unterminated string starting at", s, begin))
116 if esc != 'u':
117 try:
118 m = _b[esc]
119 except KeyError:
120 raise ValueError(
121 errmsg("Invalid \\escape: %r" % (esc,), s, end))
122 end += 1
123 else:
124 esc = s[end + 1:end + 5]
125 next_end = end + 5
126 msg = "Invalid \\uXXXX escape"
127 try:
128 if len(esc) != 4:
129 raise ValueError
130 uni = int(esc, 16)
131 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
132 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
133 if not s[end + 5:end + 7] == '\\u':
134 raise ValueError
135 esc2 = s[end + 7:end + 11]
136 if len(esc2) != 4:
137 raise ValueError
138 uni2 = int(esc2, 16)
139 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
140 next_end += 6
141 m = unichr(uni)
142 except ValueError:
143 raise ValueError(errmsg(msg, s, end))
144 end = next_end
145 _append(m)
146 return u''.join(chunks), end
147
148
149 # Use speedup
150 try:
151 scanstring = c_scanstring
152 except NameError:
153 scanstring = py_scanstring
154
155 def JSONString(match, context):
156 encoding = getattr(context, 'encoding', None)
157 strict = getattr(context, 'strict', True)
158 return scanstring(match.string, match.end(), encoding, strict)
159 pattern(r'"')(JSONString)
160
161
162 WHITESPACE = re.compile(r'\s*', FLAGS)
163
164 def JSONObject(match, context, _w=WHITESPACE.match):
165 pairs = {}
166 s = match.string
167 end = _w(s, match.end()).end()
168 nextchar = s[end:end + 1]
169 # Trivial empty object
170 if nextchar == '}':
171 return pairs, end + 1
172 if nextchar != '"':
173 raise ValueError(errmsg("Expecting property name", s, end))
174 end += 1
175 encoding = getattr(context, 'encoding', None)
176 strict = getattr(context, 'strict', True)
177 iterscan = JSONScanner.iterscan
178 while True:
179 key, end = scanstring(s, end, encoding, strict)
180 end = _w(s, end).end()
181 if s[end:end + 1] != ':':
182 raise ValueError(errmsg("Expecting : delimiter", s, end))
183 end = _w(s, end + 1).end()
184 try:
185 value, end = iterscan(s, idx=end, context=context).next()
186 except StopIteration:
187 raise ValueError(errmsg("Expecting object", s, end))
188 pairs[key] = value
189 end = _w(s, end).end()
190 nextchar = s[end:end + 1]
191 end += 1
192 if nextchar == '}':
193 break
194 if nextchar != ',':
195 raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
196 end = _w(s, end).end()
197 nextchar = s[end:end + 1]
198 end += 1
199 if nextchar != '"':
200 raise ValueError(errmsg("Expecting property name", s, end - 1))
201 object_hook = getattr(context, 'object_hook', None)
202 if object_hook is not None:
203 pairs = object_hook(pairs)
204 return pairs, end
205 pattern(r'{')(JSONObject)
206
207
208 def JSONArray(match, context, _w=WHITESPACE.match):
209 values = []
210 s = match.string
211 end = _w(s, match.end()).end()
212 # Look-ahead for trivial empty array
213 nextchar = s[end:end + 1]
214 if nextchar == ']':
215 return values, end + 1
216 iterscan = JSONScanner.iterscan
217 while True:
218 try:
219 value, end = iterscan(s, idx=end, context=context).next()
220 except StopIteration:
221 raise ValueError(errmsg("Expecting object", s, end))
222 values.append(value)
223 end = _w(s, end).end()
224 nextchar = s[end:end + 1]
225 end += 1
226 if nextchar == ']':
227 break
228 if nextchar != ',':
229 raise ValueError(errmsg("Expecting , delimiter", s, end))
230 end = _w(s, end).end()
231 return values, end
232 pattern(r'\[')(JSONArray)
233
234
235 ANYTHING = [
236 JSONObject,
237 JSONArray,
238 JSONString,
239 JSONConstant,
240 JSONNumber,
241 ]
242
243 JSONScanner = Scanner(ANYTHING)
244
245
246 class JSONDecoder(object):
247 """
248 Simple JSON <http://json.org> decoder
249
250 Performs the following translations in decoding by default:
251
252 +---------------+-------------------+
253 | JSON | Python |
254 +===============+===================+
255 | object | dict |
256 +---------------+-------------------+
257 | array | list |
258 +---------------+-------------------+
259 | string | unicode |
260 +---------------+-------------------+
261 | number (int) | int, long |
262 +---------------+-------------------+
263 | number (real) | float |
264 +---------------+-------------------+
265 | true | True |
266 +---------------+-------------------+
267 | false | False |
268 +---------------+-------------------+
269 | null | None |
270 +---------------+-------------------+
271
272 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
273 their corresponding ``float`` values, which is outside the JSON spec.
274 """
275
276 _scanner = Scanner(ANYTHING)
277 __all__ = ['__init__', 'decode', 'raw_decode']
278
279 def __init__(self, encoding=None, object_hook=None, parse_float=None,
280 parse_int=None, parse_constant=None, strict=True):
281 """
282 ``encoding`` determines the encoding used to interpret any ``str``
283 objects decoded by this instance (utf-8 by default). It has no
284 effect when decoding ``unicode`` objects.
285
286 Note that currently only encodings that are a superset of ASCII work,
287 strings of other encodings should be passed in as ``unicode``.
288
289 ``object_hook``, if specified, will be called with the result
290 of every JSON object decoded and its return value will be used in
291 place of the given ``dict``. This can be used to provide custom
292 deserializations (e.g. to support JSON-RPC class hinting).
293
294 ``parse_float``, if specified, will be called with the string
295 of every JSON float to be decoded. By default this is equivalent to
296 float(num_str). This can be used to use another datatype or parser
297 for JSON floats (e.g. decimal.Decimal).
298
299 ``parse_int``, if specified, will be called with the string
300 of every JSON int to be decoded. By default this is equivalent to
301 int(num_str). This can be used to use another datatype or parser
302 for JSON integers (e.g. float).
303
304 ``parse_constant``, if specified, will be called with one of the
305 following strings: -Infinity, Infinity, NaN, null, true, false.
306 This can be used to raise an exception if invalid JSON numbers
307 are encountered.
308 """
309 self.encoding = encoding
310 self.object_hook = object_hook
311 self.parse_float = parse_float
312 self.parse_int = parse_int
313 self.parse_constant = parse_constant
314 self.strict = strict
315
316 def decode(self, s, _w=WHITESPACE.match):
317 """
318 Return the Python representation of ``s`` (a ``str`` or ``unicode``
319 instance containing a JSON document)
320 """
321 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
322 end = _w(s, end).end()
323 if end != len(s):
324 raise ValueError(errmsg("Extra data", s, end, len(s)))
325 return obj
326
327 def raw_decode(self, s, **kw):
328 """
329 Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
330 with a JSON document) and return a 2-tuple of the Python
331 representation and the index in ``s`` where the document ended.
332
333 This can be used to decode a JSON document from a string that may
334 have extraneous data at the end.
335 """
336 kw.setdefault('context', self)
337 try:
338 obj, end = self._scanner.iterscan(s, **kw).next()
339 except StopIteration:
340 raise ValueError("No JSON object could be decoded")
341 return obj, end
342
343 __all__ = ['JSONDecoder']