]> jfr.im git - irc/quakenet/qwebirc.git/blame - esimplejson/encoder.py
more names into about
[irc/quakenet/qwebirc.git] / esimplejson / encoder.py
CommitLineData
4d256d41
CP
1"""
2Implementation of JSONEncoder
3"""
4import re
5
6try:
becfa850 7 from esimplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
4d256d41
CP
8except ImportError:
9 pass
10
11ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
12ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
13HAS_UTF8 = re.compile(r'[\x80-\xff]')
14ESCAPE_DCT = {
15 '\\': '\\\\',
16 '"': '\\"',
17 '\b': '\\b',
18 '\f': '\\f',
19 '\n': '\\n',
20 '\r': '\\r',
21 '\t': '\\t',
22}
23for i in range(0x20):
24 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
25
26# Assume this produces an infinity on all machines (probably not guaranteed)
27INFINITY = float('1e66666')
28FLOAT_REPR = repr
29
30def floatstr(o, allow_nan=True):
31 # Check for specials. Note that this type of test is processor- and/or
32 # platform-specific, so do tests which don't depend on the internals.
33
34 if o != o:
35 text = 'NaN'
36 elif o == INFINITY:
37 text = 'Infinity'
38 elif o == -INFINITY:
39 text = '-Infinity'
40 else:
41 return FLOAT_REPR(o)
42
43 if not allow_nan:
44 raise ValueError("Out of range float values are not JSON compliant: %r"
45 % (o,))
46
47 return text
48
49
50def encode_basestring(s):
51 """
52 Return a JSON representation of a Python string
53 """
54 def replace(match):
55 return ESCAPE_DCT[match.group(0)]
56 return '"' + ESCAPE.sub(replace, s) + '"'
57
58
59def py_encode_basestring_ascii(s):
60 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
61 s = s.decode('utf-8')
62 def replace(match):
63 s = match.group(0)
64 try:
65 return ESCAPE_DCT[s]
66 except KeyError:
67 n = ord(s)
68 if n < 0x10000:
69 return '\\u%04x' % (n,)
70 else:
71 # surrogate pair
72 n -= 0x10000
73 s1 = 0xd800 | ((n >> 10) & 0x3ff)
74 s2 = 0xdc00 | (n & 0x3ff)
75 return '\\u%04x\\u%04x' % (s1, s2)
76 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
77
78
79try:
80 encode_basestring_ascii = c_encode_basestring_ascii
81except NameError:
82 encode_basestring_ascii = py_encode_basestring_ascii
83
84
85class JSONEncoder(object):
86 """
87 Extensible JSON <http://json.org> encoder for Python data structures.
88
89 Supports the following objects and types by default:
90
91 +-------------------+---------------+
92 | Python | JSON |
93 +===================+===============+
94 | dict | object |
95 +-------------------+---------------+
96 | list, tuple | array |
97 +-------------------+---------------+
98 | str, unicode | string |
99 +-------------------+---------------+
100 | int, long, float | number |
101 +-------------------+---------------+
102 | True | true |
103 +-------------------+---------------+
104 | False | false |
105 +-------------------+---------------+
106 | None | null |
107 +-------------------+---------------+
108
109 To extend this to recognize other objects, subclass and implement a
110 ``.default()`` method with another method that returns a serializable
111 object for ``o`` if possible, otherwise it should call the superclass
112 implementation (to raise ``TypeError``).
113 """
114 __all__ = ['__init__', 'default', 'encode', 'iterencode']
115 item_separator = ', '
116 key_separator = ': '
117 def __init__(self, skipkeys=False, ensure_ascii=True,
118 check_circular=True, allow_nan=True, sort_keys=False,
119 indent=None, separators=None, encoding='utf-8', default=None):
120 """
121 Constructor for JSONEncoder, with sensible defaults.
122
123 If skipkeys is False, then it is a TypeError to attempt
124 encoding of keys that are not str, int, long, float or None. If
125 skipkeys is True, such items are simply skipped.
126
127 If ensure_ascii is True, the output is guaranteed to be str
128 objects with all incoming unicode characters escaped. If
129 ensure_ascii is false, the output will be unicode object.
130
131 If check_circular is True, then lists, dicts, and custom encoded
132 objects will be checked for circular references during encoding to
133 prevent an infinite recursion (which would cause an OverflowError).
134 Otherwise, no such check takes place.
135
136 If allow_nan is True, then NaN, Infinity, and -Infinity will be
137 encoded as such. This behavior is not JSON specification compliant,
138 but is consistent with most JavaScript based encoders and decoders.
139 Otherwise, it will be a ValueError to encode such floats.
140
141 If sort_keys is True, then the output of dictionaries will be
142 sorted by key; this is useful for regression tests to ensure
143 that JSON serializations can be compared on a day-to-day basis.
144
145 If indent is a non-negative integer, then JSON array
146 elements and object members will be pretty-printed with that
147 indent level. An indent level of 0 will only insert newlines.
148 None is the most compact representation.
149
150 If specified, separators should be a (item_separator, key_separator)
151 tuple. The default is (', ', ': '). To get the most compact JSON
152 representation you should specify (',', ':') to eliminate whitespace.
153
154 If specified, default is a function that gets called for objects
155 that can't otherwise be serialized. It should return a JSON encodable
156 version of the object or raise a ``TypeError``.
157
158 If encoding is not None, then all input strings will be
159 transformed into unicode using that encoding prior to JSON-encoding.
160 The default is UTF-8.
161 """
162
163 self.skipkeys = skipkeys
164 self.ensure_ascii = ensure_ascii
165 self.check_circular = check_circular
166 self.allow_nan = allow_nan
167 self.sort_keys = sort_keys
168 self.indent = indent
169 self.current_indent_level = 0
170 if separators is not None:
171 self.item_separator, self.key_separator = separators
172 if default is not None:
173 self.default = default
174 self.encoding = encoding
175
176 def _newline_indent(self):
177 return '\n' + (' ' * (self.indent * self.current_indent_level))
178
179 def _iterencode_list(self, lst, markers=None):
180 if not lst:
181 yield '[]'
182 return
183 if markers is not None:
184 markerid = id(lst)
185 if markerid in markers:
186 raise ValueError("Circular reference detected")
187 markers[markerid] = lst
188 yield '['
189 if self.indent is not None:
190 self.current_indent_level += 1
191 newline_indent = self._newline_indent()
192 separator = self.item_separator + newline_indent
193 yield newline_indent
194 else:
195 newline_indent = None
196 separator = self.item_separator
197 first = True
198 for value in lst:
199 if first:
200 first = False
201 else:
202 yield separator
203 for chunk in self._iterencode(value, markers):
204 yield chunk
205 if newline_indent is not None:
206 self.current_indent_level -= 1
207 yield self._newline_indent()
208 yield ']'
209 if markers is not None:
210 del markers[markerid]
211
212 def _iterencode_dict(self, dct, markers=None):
213 if not dct:
214 yield '{}'
215 return
216 if markers is not None:
217 markerid = id(dct)
218 if markerid in markers:
219 raise ValueError("Circular reference detected")
220 markers[markerid] = dct
221 yield '{'
222 key_separator = self.key_separator
223 if self.indent is not None:
224 self.current_indent_level += 1
225 newline_indent = self._newline_indent()
226 item_separator = self.item_separator + newline_indent
227 yield newline_indent
228 else:
229 newline_indent = None
230 item_separator = self.item_separator
231 first = True
232 if self.ensure_ascii:
233 encoder = encode_basestring_ascii
234 else:
235 encoder = encode_basestring
236 allow_nan = self.allow_nan
237 if self.sort_keys:
238 keys = dct.keys()
239 keys.sort()
240 items = [(k, dct[k]) for k in keys]
241 else:
242 items = dct.iteritems()
243 _encoding = self.encoding
244 _do_decode = (_encoding is not None
245 and not (_encoding == 'utf-8'))
246 for key, value in items:
247 if isinstance(key, str):
248 if _do_decode:
249 key = key.decode(_encoding)
250 elif isinstance(key, basestring):
251 pass
252 # JavaScript is weakly typed for these, so it makes sense to
253 # also allow them. Many encoders seem to do something like this.
254 elif isinstance(key, float):
255 key = floatstr(key, allow_nan)
256 elif isinstance(key, (int, long)):
257 key = str(key)
258 elif key is True:
259 key = 'true'
260 elif key is False:
261 key = 'false'
262 elif key is None:
263 key = 'null'
264 elif self.skipkeys:
265 continue
266 else:
267 raise TypeError("key %r is not a string" % (key,))
268 if first:
269 first = False
270 else:
271 yield item_separator
272 yield encoder(key)
273 yield key_separator
274 for chunk in self._iterencode(value, markers):
275 yield chunk
276 if newline_indent is not None:
277 self.current_indent_level -= 1
278 yield self._newline_indent()
279 yield '}'
280 if markers is not None:
281 del markers[markerid]
282
283 def _iterencode(self, o, markers=None):
284 if isinstance(o, basestring):
285 if self.ensure_ascii:
286 encoder = encode_basestring_ascii
287 else:
288 encoder = encode_basestring
289 _encoding = self.encoding
290 if (_encoding is not None and isinstance(o, str)
291 and not (_encoding == 'utf-8')):
292 o = o.decode(_encoding)
293 yield encoder(o)
294 elif o is None:
295 yield 'null'
296 elif o is True:
297 yield 'true'
298 elif o is False:
299 yield 'false'
300 elif isinstance(o, (int, long)):
301 yield str(o)
302 elif isinstance(o, float):
303 yield floatstr(o, self.allow_nan)
304 elif isinstance(o, (list, tuple)):
305 for chunk in self._iterencode_list(o, markers):
306 yield chunk
307 elif isinstance(o, dict):
308 for chunk in self._iterencode_dict(o, markers):
309 yield chunk
310 else:
311 if markers is not None:
312 markerid = id(o)
313 if markerid in markers:
314 raise ValueError("Circular reference detected")
315 markers[markerid] = o
316 for chunk in self._iterencode_default(o, markers):
317 yield chunk
318 if markers is not None:
319 del markers[markerid]
320
321 def _iterencode_default(self, o, markers=None):
322 newobj = self.default(o)
323 return self._iterencode(newobj, markers)
324
325 def default(self, o):
326 """
327 Implement this method in a subclass such that it returns
328 a serializable object for ``o``, or calls the base implementation
329 (to raise a ``TypeError``).
330
331 For example, to support arbitrary iterators, you could
332 implement default like this::
333
334 def default(self, o):
335 try:
336 iterable = iter(o)
337 except TypeError:
338 pass
339 else:
340 return list(iterable)
341 return JSONEncoder.default(self, o)
342 """
343 raise TypeError("%r is not JSON serializable" % (o,))
344
345 def encode(self, o):
346 """
347 Return a JSON string representation of a Python data structure.
348
349 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
350 '{"foo": ["bar", "baz"]}'
351 """
352 # This is for extremely simple cases and benchmarks.
353 if isinstance(o, basestring):
354 if isinstance(o, str):
355 _encoding = self.encoding
356 if (_encoding is not None
357 and not (_encoding == 'utf-8')):
358 o = o.decode(_encoding)
359 if self.ensure_ascii:
360 return encode_basestring_ascii(o)
361 else:
362 return encode_basestring(o)
363 # This doesn't pass the iterator directly to ''.join() because the
364 # exceptions aren't as detailed. The list call should be roughly
365 # equivalent to the PySequence_Fast that ''.join() would do.
366 chunks = list(self.iterencode(o))
367 return ''.join(chunks)
368
369 def iterencode(self, o):
370 """
371 Encode the given object and yield each string
372 representation as available.
373
374 For example::
375
376 for chunk in JSONEncoder().iterencode(bigobject):
377 mysocket.write(chunk)
378 """
379 if self.check_circular:
380 markers = {}
381 else:
382 markers = None
383 return self._iterencode(o, markers)
384
385__all__ = ['JSONEncoder']