]>
Commit | Line | Data |
---|---|---|
4d256d41 CP |
1 | """ |
2 | Implementation of JSONEncoder | |
3 | """ | |
4 | import re | |
5 | ||
6 | try: | |
becfa850 | 7 | from esimplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii |
4d256d41 CP |
8 | except ImportError: |
9 | pass | |
10 | ||
11 | ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') | |
12 | ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') | |
13 | HAS_UTF8 = re.compile(r'[\x80-\xff]') | |
14 | ESCAPE_DCT = { | |
15 | '\\': '\\\\', | |
16 | '"': '\\"', | |
17 | '\b': '\\b', | |
18 | '\f': '\\f', | |
19 | '\n': '\\n', | |
20 | '\r': '\\r', | |
21 | '\t': '\\t', | |
22 | } | |
23 | for i in range(0x20): | |
24 | ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) | |
25 | ||
26 | # Assume this produces an infinity on all machines (probably not guaranteed) | |
27 | INFINITY = float('1e66666') | |
28 | FLOAT_REPR = repr | |
29 | ||
30 | def floatstr(o, allow_nan=True): | |
31 | # Check for specials. Note that this type of test is processor- and/or | |
32 | # platform-specific, so do tests which don't depend on the internals. | |
33 | ||
34 | if o != o: | |
35 | text = 'NaN' | |
36 | elif o == INFINITY: | |
37 | text = 'Infinity' | |
38 | elif o == -INFINITY: | |
39 | text = '-Infinity' | |
40 | else: | |
41 | return FLOAT_REPR(o) | |
42 | ||
43 | if not allow_nan: | |
44 | raise ValueError("Out of range float values are not JSON compliant: %r" | |
45 | % (o,)) | |
46 | ||
47 | return text | |
48 | ||
49 | ||
50 | def encode_basestring(s): | |
51 | """ | |
52 | Return a JSON representation of a Python string | |
53 | """ | |
54 | def replace(match): | |
55 | return ESCAPE_DCT[match.group(0)] | |
56 | return '"' + ESCAPE.sub(replace, s) + '"' | |
57 | ||
58 | ||
59 | def py_encode_basestring_ascii(s): | |
60 | if isinstance(s, str) and HAS_UTF8.search(s) is not None: | |
61 | s = s.decode('utf-8') | |
62 | def replace(match): | |
63 | s = match.group(0) | |
64 | try: | |
65 | return ESCAPE_DCT[s] | |
66 | except KeyError: | |
67 | n = ord(s) | |
68 | if n < 0x10000: | |
69 | return '\\u%04x' % (n,) | |
70 | else: | |
71 | # surrogate pair | |
72 | n -= 0x10000 | |
73 | s1 = 0xd800 | ((n >> 10) & 0x3ff) | |
74 | s2 = 0xdc00 | (n & 0x3ff) | |
75 | return '\\u%04x\\u%04x' % (s1, s2) | |
76 | return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' | |
77 | ||
78 | ||
79 | try: | |
80 | encode_basestring_ascii = c_encode_basestring_ascii | |
81 | except NameError: | |
82 | encode_basestring_ascii = py_encode_basestring_ascii | |
83 | ||
84 | ||
85 | class JSONEncoder(object): | |
86 | """ | |
87 | Extensible JSON <http://json.org> encoder for Python data structures. | |
88 | ||
89 | Supports the following objects and types by default: | |
90 | ||
91 | +-------------------+---------------+ | |
92 | | Python | JSON | | |
93 | +===================+===============+ | |
94 | | dict | object | | |
95 | +-------------------+---------------+ | |
96 | | list, tuple | array | | |
97 | +-------------------+---------------+ | |
98 | | str, unicode | string | | |
99 | +-------------------+---------------+ | |
100 | | int, long, float | number | | |
101 | +-------------------+---------------+ | |
102 | | True | true | | |
103 | +-------------------+---------------+ | |
104 | | False | false | | |
105 | +-------------------+---------------+ | |
106 | | None | null | | |
107 | +-------------------+---------------+ | |
108 | ||
109 | To extend this to recognize other objects, subclass and implement a | |
110 | ``.default()`` method with another method that returns a serializable | |
111 | object for ``o`` if possible, otherwise it should call the superclass | |
112 | implementation (to raise ``TypeError``). | |
113 | """ | |
114 | __all__ = ['__init__', 'default', 'encode', 'iterencode'] | |
115 | item_separator = ', ' | |
116 | key_separator = ': ' | |
117 | def __init__(self, skipkeys=False, ensure_ascii=True, | |
118 | check_circular=True, allow_nan=True, sort_keys=False, | |
119 | indent=None, separators=None, encoding='utf-8', default=None): | |
120 | """ | |
121 | Constructor for JSONEncoder, with sensible defaults. | |
122 | ||
123 | If skipkeys is False, then it is a TypeError to attempt | |
124 | encoding of keys that are not str, int, long, float or None. If | |
125 | skipkeys is True, such items are simply skipped. | |
126 | ||
127 | If ensure_ascii is True, the output is guaranteed to be str | |
128 | objects with all incoming unicode characters escaped. If | |
129 | ensure_ascii is false, the output will be unicode object. | |
130 | ||
131 | If check_circular is True, then lists, dicts, and custom encoded | |
132 | objects will be checked for circular references during encoding to | |
133 | prevent an infinite recursion (which would cause an OverflowError). | |
134 | Otherwise, no such check takes place. | |
135 | ||
136 | If allow_nan is True, then NaN, Infinity, and -Infinity will be | |
137 | encoded as such. This behavior is not JSON specification compliant, | |
138 | but is consistent with most JavaScript based encoders and decoders. | |
139 | Otherwise, it will be a ValueError to encode such floats. | |
140 | ||
141 | If sort_keys is True, then the output of dictionaries will be | |
142 | sorted by key; this is useful for regression tests to ensure | |
143 | that JSON serializations can be compared on a day-to-day basis. | |
144 | ||
145 | If indent is a non-negative integer, then JSON array | |
146 | elements and object members will be pretty-printed with that | |
147 | indent level. An indent level of 0 will only insert newlines. | |
148 | None is the most compact representation. | |
149 | ||
150 | If specified, separators should be a (item_separator, key_separator) | |
151 | tuple. The default is (', ', ': '). To get the most compact JSON | |
152 | representation you should specify (',', ':') to eliminate whitespace. | |
153 | ||
154 | If specified, default is a function that gets called for objects | |
155 | that can't otherwise be serialized. It should return a JSON encodable | |
156 | version of the object or raise a ``TypeError``. | |
157 | ||
158 | If encoding is not None, then all input strings will be | |
159 | transformed into unicode using that encoding prior to JSON-encoding. | |
160 | The default is UTF-8. | |
161 | """ | |
162 | ||
163 | self.skipkeys = skipkeys | |
164 | self.ensure_ascii = ensure_ascii | |
165 | self.check_circular = check_circular | |
166 | self.allow_nan = allow_nan | |
167 | self.sort_keys = sort_keys | |
168 | self.indent = indent | |
169 | self.current_indent_level = 0 | |
170 | if separators is not None: | |
171 | self.item_separator, self.key_separator = separators | |
172 | if default is not None: | |
173 | self.default = default | |
174 | self.encoding = encoding | |
175 | ||
176 | def _newline_indent(self): | |
177 | return '\n' + (' ' * (self.indent * self.current_indent_level)) | |
178 | ||
179 | def _iterencode_list(self, lst, markers=None): | |
180 | if not lst: | |
181 | yield '[]' | |
182 | return | |
183 | if markers is not None: | |
184 | markerid = id(lst) | |
185 | if markerid in markers: | |
186 | raise ValueError("Circular reference detected") | |
187 | markers[markerid] = lst | |
188 | yield '[' | |
189 | if self.indent is not None: | |
190 | self.current_indent_level += 1 | |
191 | newline_indent = self._newline_indent() | |
192 | separator = self.item_separator + newline_indent | |
193 | yield newline_indent | |
194 | else: | |
195 | newline_indent = None | |
196 | separator = self.item_separator | |
197 | first = True | |
198 | for value in lst: | |
199 | if first: | |
200 | first = False | |
201 | else: | |
202 | yield separator | |
203 | for chunk in self._iterencode(value, markers): | |
204 | yield chunk | |
205 | if newline_indent is not None: | |
206 | self.current_indent_level -= 1 | |
207 | yield self._newline_indent() | |
208 | yield ']' | |
209 | if markers is not None: | |
210 | del markers[markerid] | |
211 | ||
212 | def _iterencode_dict(self, dct, markers=None): | |
213 | if not dct: | |
214 | yield '{}' | |
215 | return | |
216 | if markers is not None: | |
217 | markerid = id(dct) | |
218 | if markerid in markers: | |
219 | raise ValueError("Circular reference detected") | |
220 | markers[markerid] = dct | |
221 | yield '{' | |
222 | key_separator = self.key_separator | |
223 | if self.indent is not None: | |
224 | self.current_indent_level += 1 | |
225 | newline_indent = self._newline_indent() | |
226 | item_separator = self.item_separator + newline_indent | |
227 | yield newline_indent | |
228 | else: | |
229 | newline_indent = None | |
230 | item_separator = self.item_separator | |
231 | first = True | |
232 | if self.ensure_ascii: | |
233 | encoder = encode_basestring_ascii | |
234 | else: | |
235 | encoder = encode_basestring | |
236 | allow_nan = self.allow_nan | |
237 | if self.sort_keys: | |
238 | keys = dct.keys() | |
239 | keys.sort() | |
240 | items = [(k, dct[k]) for k in keys] | |
241 | else: | |
242 | items = dct.iteritems() | |
243 | _encoding = self.encoding | |
244 | _do_decode = (_encoding is not None | |
245 | and not (_encoding == 'utf-8')) | |
246 | for key, value in items: | |
247 | if isinstance(key, str): | |
248 | if _do_decode: | |
249 | key = key.decode(_encoding) | |
250 | elif isinstance(key, basestring): | |
251 | pass | |
252 | # JavaScript is weakly typed for these, so it makes sense to | |
253 | # also allow them. Many encoders seem to do something like this. | |
254 | elif isinstance(key, float): | |
255 | key = floatstr(key, allow_nan) | |
256 | elif isinstance(key, (int, long)): | |
257 | key = str(key) | |
258 | elif key is True: | |
259 | key = 'true' | |
260 | elif key is False: | |
261 | key = 'false' | |
262 | elif key is None: | |
263 | key = 'null' | |
264 | elif self.skipkeys: | |
265 | continue | |
266 | else: | |
267 | raise TypeError("key %r is not a string" % (key,)) | |
268 | if first: | |
269 | first = False | |
270 | else: | |
271 | yield item_separator | |
272 | yield encoder(key) | |
273 | yield key_separator | |
274 | for chunk in self._iterencode(value, markers): | |
275 | yield chunk | |
276 | if newline_indent is not None: | |
277 | self.current_indent_level -= 1 | |
278 | yield self._newline_indent() | |
279 | yield '}' | |
280 | if markers is not None: | |
281 | del markers[markerid] | |
282 | ||
283 | def _iterencode(self, o, markers=None): | |
284 | if isinstance(o, basestring): | |
285 | if self.ensure_ascii: | |
286 | encoder = encode_basestring_ascii | |
287 | else: | |
288 | encoder = encode_basestring | |
289 | _encoding = self.encoding | |
290 | if (_encoding is not None and isinstance(o, str) | |
291 | and not (_encoding == 'utf-8')): | |
292 | o = o.decode(_encoding) | |
293 | yield encoder(o) | |
294 | elif o is None: | |
295 | yield 'null' | |
296 | elif o is True: | |
297 | yield 'true' | |
298 | elif o is False: | |
299 | yield 'false' | |
300 | elif isinstance(o, (int, long)): | |
301 | yield str(o) | |
302 | elif isinstance(o, float): | |
303 | yield floatstr(o, self.allow_nan) | |
304 | elif isinstance(o, (list, tuple)): | |
305 | for chunk in self._iterencode_list(o, markers): | |
306 | yield chunk | |
307 | elif isinstance(o, dict): | |
308 | for chunk in self._iterencode_dict(o, markers): | |
309 | yield chunk | |
310 | else: | |
311 | if markers is not None: | |
312 | markerid = id(o) | |
313 | if markerid in markers: | |
314 | raise ValueError("Circular reference detected") | |
315 | markers[markerid] = o | |
316 | for chunk in self._iterencode_default(o, markers): | |
317 | yield chunk | |
318 | if markers is not None: | |
319 | del markers[markerid] | |
320 | ||
321 | def _iterencode_default(self, o, markers=None): | |
322 | newobj = self.default(o) | |
323 | return self._iterencode(newobj, markers) | |
324 | ||
325 | def default(self, o): | |
326 | """ | |
327 | Implement this method in a subclass such that it returns | |
328 | a serializable object for ``o``, or calls the base implementation | |
329 | (to raise a ``TypeError``). | |
330 | ||
331 | For example, to support arbitrary iterators, you could | |
332 | implement default like this:: | |
333 | ||
334 | def default(self, o): | |
335 | try: | |
336 | iterable = iter(o) | |
337 | except TypeError: | |
338 | pass | |
339 | else: | |
340 | return list(iterable) | |
341 | return JSONEncoder.default(self, o) | |
342 | """ | |
343 | raise TypeError("%r is not JSON serializable" % (o,)) | |
344 | ||
345 | def encode(self, o): | |
346 | """ | |
347 | Return a JSON string representation of a Python data structure. | |
348 | ||
349 | >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) | |
350 | '{"foo": ["bar", "baz"]}' | |
351 | """ | |
352 | # This is for extremely simple cases and benchmarks. | |
353 | if isinstance(o, basestring): | |
354 | if isinstance(o, str): | |
355 | _encoding = self.encoding | |
356 | if (_encoding is not None | |
357 | and not (_encoding == 'utf-8')): | |
358 | o = o.decode(_encoding) | |
359 | if self.ensure_ascii: | |
360 | return encode_basestring_ascii(o) | |
361 | else: | |
362 | return encode_basestring(o) | |
363 | # This doesn't pass the iterator directly to ''.join() because the | |
364 | # exceptions aren't as detailed. The list call should be roughly | |
365 | # equivalent to the PySequence_Fast that ''.join() would do. | |
366 | chunks = list(self.iterencode(o)) | |
367 | return ''.join(chunks) | |
368 | ||
369 | def iterencode(self, o): | |
370 | """ | |
371 | Encode the given object and yield each string | |
372 | representation as available. | |
373 | ||
374 | For example:: | |
375 | ||
376 | for chunk in JSONEncoder().iterencode(bigobject): | |
377 | mysocket.write(chunk) | |
378 | """ | |
379 | if self.check_circular: | |
380 | markers = {} | |
381 | else: | |
382 | markers = None | |
383 | return self._iterencode(o, markers) | |
384 | ||
385 | __all__ = ['JSONEncoder'] |