]>
Commit | Line | Data |
---|---|---|
1 | """ | |
2 | Implementation of JSONDecoder | |
3 | """ | |
4 | import re | |
5 | import sys | |
6 | ||
7 | from simplejson.scanner import Scanner, pattern | |
8 | try: | |
9 | from simplejson._speedups import scanstring as c_scanstring | |
10 | except ImportError: | |
11 | pass | |
12 | ||
13 | FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL | |
14 | ||
15 | def _floatconstants(): | |
16 | import struct | |
17 | import sys | |
18 | _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') | |
19 | if sys.byteorder != 'big': | |
20 | _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] | |
21 | nan, inf = struct.unpack('dd', _BYTES) | |
22 | return nan, inf, -inf | |
23 | ||
24 | NaN, PosInf, NegInf = _floatconstants() | |
25 | ||
26 | ||
27 | def linecol(doc, pos): | |
28 | lineno = doc.count('\n', 0, pos) + 1 | |
29 | if lineno == 1: | |
30 | colno = pos | |
31 | else: | |
32 | colno = pos - doc.rindex('\n', 0, pos) | |
33 | return lineno, colno | |
34 | ||
35 | ||
36 | def errmsg(msg, doc, pos, end=None): | |
37 | lineno, colno = linecol(doc, pos) | |
38 | if end is None: | |
39 | return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) | |
40 | endlineno, endcolno = linecol(doc, end) | |
41 | return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( | |
42 | msg, lineno, colno, endlineno, endcolno, pos, end) | |
43 | ||
44 | ||
45 | _CONSTANTS = { | |
46 | '-Infinity': NegInf, | |
47 | 'Infinity': PosInf, | |
48 | 'NaN': NaN, | |
49 | 'true': True, | |
50 | 'false': False, | |
51 | 'null': None, | |
52 | } | |
53 | ||
54 | def JSONConstant(match, context, c=_CONSTANTS): | |
55 | s = match.group(0) | |
56 | fn = getattr(context, 'parse_constant', None) | |
57 | if fn is None: | |
58 | rval = c[s] | |
59 | else: | |
60 | rval = fn(s) | |
61 | return rval, None | |
62 | pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) | |
63 | ||
64 | ||
65 | def JSONNumber(match, context): | |
66 | match = JSONNumber.regex.match(match.string, *match.span()) | |
67 | integer, frac, exp = match.groups() | |
68 | if frac or exp: | |
69 | fn = getattr(context, 'parse_float', None) or float | |
70 | res = fn(integer + (frac or '') + (exp or '')) | |
71 | else: | |
72 | fn = getattr(context, 'parse_int', None) or int | |
73 | res = fn(integer) | |
74 | return res, None | |
75 | pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) | |
76 | ||
77 | ||
78 | STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) | |
79 | BACKSLASH = { | |
80 | '"': u'"', '\\': u'\\', '/': u'/', | |
81 | 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', | |
82 | } | |
83 | ||
84 | DEFAULT_ENCODING = "utf-8" | |
85 | ||
86 | def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): | |
87 | if encoding is None: | |
88 | encoding = DEFAULT_ENCODING | |
89 | chunks = [] | |
90 | _append = chunks.append | |
91 | begin = end - 1 | |
92 | while 1: | |
93 | chunk = _m(s, end) | |
94 | if chunk is None: | |
95 | raise ValueError( | |
96 | errmsg("Unterminated string starting at", s, begin)) | |
97 | end = chunk.end() | |
98 | content, terminator = chunk.groups() | |
99 | if content: | |
100 | if not isinstance(content, unicode): | |
101 | content = unicode(content, encoding) | |
102 | _append(content) | |
103 | if terminator == '"': | |
104 | break | |
105 | elif terminator != '\\': | |
106 | if strict: | |
107 | raise ValueError(errmsg("Invalid control character %r at", s, end)) | |
108 | else: | |
109 | _append(terminator) | |
110 | continue | |
111 | try: | |
112 | esc = s[end] | |
113 | except IndexError: | |
114 | raise ValueError( | |
115 | errmsg("Unterminated string starting at", s, begin)) | |
116 | if esc != 'u': | |
117 | try: | |
118 | m = _b[esc] | |
119 | except KeyError: | |
120 | raise ValueError( | |
121 | errmsg("Invalid \\escape: %r" % (esc,), s, end)) | |
122 | end += 1 | |
123 | else: | |
124 | esc = s[end + 1:end + 5] | |
125 | next_end = end + 5 | |
126 | msg = "Invalid \\uXXXX escape" | |
127 | try: | |
128 | if len(esc) != 4: | |
129 | raise ValueError | |
130 | uni = int(esc, 16) | |
131 | if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: | |
132 | msg = "Invalid \\uXXXX\\uXXXX surrogate pair" | |
133 | if not s[end + 5:end + 7] == '\\u': | |
134 | raise ValueError | |
135 | esc2 = s[end + 7:end + 11] | |
136 | if len(esc2) != 4: | |
137 | raise ValueError | |
138 | uni2 = int(esc2, 16) | |
139 | uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) | |
140 | next_end += 6 | |
141 | m = unichr(uni) | |
142 | except ValueError: | |
143 | raise ValueError(errmsg(msg, s, end)) | |
144 | end = next_end | |
145 | _append(m) | |
146 | return u''.join(chunks), end | |
147 | ||
148 | ||
149 | # Use speedup | |
150 | try: | |
151 | scanstring = c_scanstring | |
152 | except NameError: | |
153 | scanstring = py_scanstring | |
154 | ||
155 | def JSONString(match, context): | |
156 | encoding = getattr(context, 'encoding', None) | |
157 | strict = getattr(context, 'strict', True) | |
158 | return scanstring(match.string, match.end(), encoding, strict) | |
159 | pattern(r'"')(JSONString) | |
160 | ||
161 | ||
162 | WHITESPACE = re.compile(r'\s*', FLAGS) | |
163 | ||
164 | def JSONObject(match, context, _w=WHITESPACE.match): | |
165 | pairs = {} | |
166 | s = match.string | |
167 | end = _w(s, match.end()).end() | |
168 | nextchar = s[end:end + 1] | |
169 | # Trivial empty object | |
170 | if nextchar == '}': | |
171 | return pairs, end + 1 | |
172 | if nextchar != '"': | |
173 | raise ValueError(errmsg("Expecting property name", s, end)) | |
174 | end += 1 | |
175 | encoding = getattr(context, 'encoding', None) | |
176 | strict = getattr(context, 'strict', True) | |
177 | iterscan = JSONScanner.iterscan | |
178 | while True: | |
179 | key, end = scanstring(s, end, encoding, strict) | |
180 | end = _w(s, end).end() | |
181 | if s[end:end + 1] != ':': | |
182 | raise ValueError(errmsg("Expecting : delimiter", s, end)) | |
183 | end = _w(s, end + 1).end() | |
184 | try: | |
185 | value, end = iterscan(s, idx=end, context=context).next() | |
186 | except StopIteration: | |
187 | raise ValueError(errmsg("Expecting object", s, end)) | |
188 | pairs[key] = value | |
189 | end = _w(s, end).end() | |
190 | nextchar = s[end:end + 1] | |
191 | end += 1 | |
192 | if nextchar == '}': | |
193 | break | |
194 | if nextchar != ',': | |
195 | raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) | |
196 | end = _w(s, end).end() | |
197 | nextchar = s[end:end + 1] | |
198 | end += 1 | |
199 | if nextchar != '"': | |
200 | raise ValueError(errmsg("Expecting property name", s, end - 1)) | |
201 | object_hook = getattr(context, 'object_hook', None) | |
202 | if object_hook is not None: | |
203 | pairs = object_hook(pairs) | |
204 | return pairs, end | |
205 | pattern(r'{')(JSONObject) | |
206 | ||
207 | ||
208 | def JSONArray(match, context, _w=WHITESPACE.match): | |
209 | values = [] | |
210 | s = match.string | |
211 | end = _w(s, match.end()).end() | |
212 | # Look-ahead for trivial empty array | |
213 | nextchar = s[end:end + 1] | |
214 | if nextchar == ']': | |
215 | return values, end + 1 | |
216 | iterscan = JSONScanner.iterscan | |
217 | while True: | |
218 | try: | |
219 | value, end = iterscan(s, idx=end, context=context).next() | |
220 | except StopIteration: | |
221 | raise ValueError(errmsg("Expecting object", s, end)) | |
222 | values.append(value) | |
223 | end = _w(s, end).end() | |
224 | nextchar = s[end:end + 1] | |
225 | end += 1 | |
226 | if nextchar == ']': | |
227 | break | |
228 | if nextchar != ',': | |
229 | raise ValueError(errmsg("Expecting , delimiter", s, end)) | |
230 | end = _w(s, end).end() | |
231 | return values, end | |
232 | pattern(r'\[')(JSONArray) | |
233 | ||
234 | ||
235 | ANYTHING = [ | |
236 | JSONObject, | |
237 | JSONArray, | |
238 | JSONString, | |
239 | JSONConstant, | |
240 | JSONNumber, | |
241 | ] | |
242 | ||
243 | JSONScanner = Scanner(ANYTHING) | |
244 | ||
245 | ||
246 | class JSONDecoder(object): | |
247 | """ | |
248 | Simple JSON <http://json.org> decoder | |
249 | ||
250 | Performs the following translations in decoding by default: | |
251 | ||
252 | +---------------+-------------------+ | |
253 | | JSON | Python | | |
254 | +===============+===================+ | |
255 | | object | dict | | |
256 | +---------------+-------------------+ | |
257 | | array | list | | |
258 | +---------------+-------------------+ | |
259 | | string | unicode | | |
260 | +---------------+-------------------+ | |
261 | | number (int) | int, long | | |
262 | +---------------+-------------------+ | |
263 | | number (real) | float | | |
264 | +---------------+-------------------+ | |
265 | | true | True | | |
266 | +---------------+-------------------+ | |
267 | | false | False | | |
268 | +---------------+-------------------+ | |
269 | | null | None | | |
270 | +---------------+-------------------+ | |
271 | ||
272 | It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as | |
273 | their corresponding ``float`` values, which is outside the JSON spec. | |
274 | """ | |
275 | ||
276 | _scanner = Scanner(ANYTHING) | |
277 | __all__ = ['__init__', 'decode', 'raw_decode'] | |
278 | ||
279 | def __init__(self, encoding=None, object_hook=None, parse_float=None, | |
280 | parse_int=None, parse_constant=None, strict=True): | |
281 | """ | |
282 | ``encoding`` determines the encoding used to interpret any ``str`` | |
283 | objects decoded by this instance (utf-8 by default). It has no | |
284 | effect when decoding ``unicode`` objects. | |
285 | ||
286 | Note that currently only encodings that are a superset of ASCII work, | |
287 | strings of other encodings should be passed in as ``unicode``. | |
288 | ||
289 | ``object_hook``, if specified, will be called with the result | |
290 | of every JSON object decoded and its return value will be used in | |
291 | place of the given ``dict``. This can be used to provide custom | |
292 | deserializations (e.g. to support JSON-RPC class hinting). | |
293 | ||
294 | ``parse_float``, if specified, will be called with the string | |
295 | of every JSON float to be decoded. By default this is equivalent to | |
296 | float(num_str). This can be used to use another datatype or parser | |
297 | for JSON floats (e.g. decimal.Decimal). | |
298 | ||
299 | ``parse_int``, if specified, will be called with the string | |
300 | of every JSON int to be decoded. By default this is equivalent to | |
301 | int(num_str). This can be used to use another datatype or parser | |
302 | for JSON integers (e.g. float). | |
303 | ||
304 | ``parse_constant``, if specified, will be called with one of the | |
305 | following strings: -Infinity, Infinity, NaN, null, true, false. | |
306 | This can be used to raise an exception if invalid JSON numbers | |
307 | are encountered. | |
308 | """ | |
309 | self.encoding = encoding | |
310 | self.object_hook = object_hook | |
311 | self.parse_float = parse_float | |
312 | self.parse_int = parse_int | |
313 | self.parse_constant = parse_constant | |
314 | self.strict = strict | |
315 | ||
316 | def decode(self, s, _w=WHITESPACE.match): | |
317 | """ | |
318 | Return the Python representation of ``s`` (a ``str`` or ``unicode`` | |
319 | instance containing a JSON document) | |
320 | """ | |
321 | obj, end = self.raw_decode(s, idx=_w(s, 0).end()) | |
322 | end = _w(s, end).end() | |
323 | if end != len(s): | |
324 | raise ValueError(errmsg("Extra data", s, end, len(s))) | |
325 | return obj | |
326 | ||
327 | def raw_decode(self, s, **kw): | |
328 | """ | |
329 | Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning | |
330 | with a JSON document) and return a 2-tuple of the Python | |
331 | representation and the index in ``s`` where the document ended. | |
332 | ||
333 | This can be used to decode a JSON document from a string that may | |
334 | have extraneous data at the end. | |
335 | """ | |
336 | kw.setdefault('context', self) | |
337 | try: | |
338 | obj, end = self._scanner.iterscan(s, **kw).next() | |
339 | except StopIteration: | |
340 | raise ValueError("No JSON object could be decoded") | |
341 | return obj, end | |
342 | ||
343 | __all__ = ['JSONDecoder'] |