2 Implementation of JSONEncoder
7 from simplejson
._speedups
import encode_basestring_ascii
as c_encode_basestring_ascii
11 ESCAPE
= re
.compile(r
'[\x00-\x1f\\"\b\f\n\r\t]')
12 ESCAPE_ASCII
= re
.compile(r
'([\\"]|[^\ -~])')
13 HAS_UTF8
= re
.compile(r
'[\x80-\xff]')
24 ESCAPE_DCT
.setdefault(chr(i
), '\\u%04x' % (i
,))
26 # Assume this produces an infinity on all machines (probably not guaranteed)
27 INFINITY
= float('1e66666')
30 def floatstr(o
, allow_nan
=True):
31 # Check for specials. Note that this type of test is processor- and/or
32 # platform-specific, so do tests which don't depend on the internals.
44 raise ValueError("Out of range float values are not JSON compliant: %r"
50 def encode_basestring(s
):
52 Return a JSON representation of a Python string
55 return ESCAPE_DCT
[match
.group(0)]
56 return '"' + ESCAPE
.sub(replace
, s
) + '"'
59 def py_encode_basestring_ascii(s
):
60 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
69 return '\\u%04x' % (n
,)
73 s1
= 0xd800 |
((n
>> 10) & 0x3ff)
74 s2
= 0xdc00 |
(n
& 0x3ff)
75 return '\\u%04x\\u%04x' % (s1
, s2
)
76 return '"' + str(ESCAPE_ASCII
.sub(replace
, s
)) + '"'
80 encode_basestring_ascii
= c_encode_basestring_ascii
82 encode_basestring_ascii
= py_encode_basestring_ascii
85 class JSONEncoder(object):
87 Extensible JSON <http://json.org> encoder for Python data structures.
89 Supports the following objects and types by default:
91 +-------------------+---------------+
93 +===================+===============+
95 +-------------------+---------------+
96 | list, tuple | array |
97 +-------------------+---------------+
98 | str, unicode | string |
99 +-------------------+---------------+
100 | int, long, float | number |
101 +-------------------+---------------+
103 +-------------------+---------------+
105 +-------------------+---------------+
107 +-------------------+---------------+
109 To extend this to recognize other objects, subclass and implement a
110 ``.default()`` method with another method that returns a serializable
111 object for ``o`` if possible, otherwise it should call the superclass
112 implementation (to raise ``TypeError``).
114 __all__
= ['__init__', 'default', 'encode', 'iterencode']
115 item_separator
= ', '
117 def __init__(self
, skipkeys
=False, ensure_ascii
=True,
118 check_circular
=True, allow_nan
=True, sort_keys
=False,
119 indent
=None, separators
=None, encoding
='utf-8', default
=None):
121 Constructor for JSONEncoder, with sensible defaults.
123 If skipkeys is False, then it is a TypeError to attempt
124 encoding of keys that are not str, int, long, float or None. If
125 skipkeys is True, such items are simply skipped.
127 If ensure_ascii is True, the output is guaranteed to be str
128 objects with all incoming unicode characters escaped. If
129 ensure_ascii is false, the output will be unicode object.
131 If check_circular is True, then lists, dicts, and custom encoded
132 objects will be checked for circular references during encoding to
133 prevent an infinite recursion (which would cause an OverflowError).
134 Otherwise, no such check takes place.
136 If allow_nan is True, then NaN, Infinity, and -Infinity will be
137 encoded as such. This behavior is not JSON specification compliant,
138 but is consistent with most JavaScript based encoders and decoders.
139 Otherwise, it will be a ValueError to encode such floats.
141 If sort_keys is True, then the output of dictionaries will be
142 sorted by key; this is useful for regression tests to ensure
143 that JSON serializations can be compared on a day-to-day basis.
145 If indent is a non-negative integer, then JSON array
146 elements and object members will be pretty-printed with that
147 indent level. An indent level of 0 will only insert newlines.
148 None is the most compact representation.
150 If specified, separators should be a (item_separator, key_separator)
151 tuple. The default is (', ', ': '). To get the most compact JSON
152 representation you should specify (',', ':') to eliminate whitespace.
154 If specified, default is a function that gets called for objects
155 that can't otherwise be serialized. It should return a JSON encodable
156 version of the object or raise a ``TypeError``.
158 If encoding is not None, then all input strings will be
159 transformed into unicode using that encoding prior to JSON-encoding.
160 The default is UTF-8.
163 self
.skipkeys
= skipkeys
164 self
.ensure_ascii
= ensure_ascii
165 self
.check_circular
= check_circular
166 self
.allow_nan
= allow_nan
167 self
.sort_keys
= sort_keys
169 self
.current_indent_level
= 0
170 if separators
is not None:
171 self
.item_separator
, self
.key_separator
= separators
172 if default
is not None:
173 self
.default
= default
174 self
.encoding
= encoding
176 def _newline_indent(self
):
177 return '\n' + (' ' * (self
.indent
* self
.current_indent_level
))
179 def _iterencode_list(self
, lst
, markers
=None):
183 if markers
is not None:
185 if markerid
in markers
:
186 raise ValueError("Circular reference detected")
187 markers
[markerid
] = lst
189 if self
.indent
is not None:
190 self
.current_indent_level
+= 1
191 newline_indent
= self
._newline
_indent
()
192 separator
= self
.item_separator
+ newline_indent
195 newline_indent
= None
196 separator
= self
.item_separator
203 for chunk
in self
._iterencode
(value
, markers
):
205 if newline_indent
is not None:
206 self
.current_indent_level
-= 1
207 yield self
._newline
_indent
()
209 if markers
is not None:
210 del markers
[markerid
]
212 def _iterencode_dict(self
, dct
, markers
=None):
216 if markers
is not None:
218 if markerid
in markers
:
219 raise ValueError("Circular reference detected")
220 markers
[markerid
] = dct
222 key_separator
= self
.key_separator
223 if self
.indent
is not None:
224 self
.current_indent_level
+= 1
225 newline_indent
= self
._newline
_indent
()
226 item_separator
= self
.item_separator
+ newline_indent
229 newline_indent
= None
230 item_separator
= self
.item_separator
232 if self
.ensure_ascii
:
233 encoder
= encode_basestring_ascii
235 encoder
= encode_basestring
236 allow_nan
= self
.allow_nan
240 items
= [(k
, dct
[k
]) for k
in keys
]
242 items
= dct
.iteritems()
243 _encoding
= self
.encoding
244 _do_decode
= (_encoding
is not None
245 and not (_encoding
== 'utf-8'))
246 for key
, value
in items
:
247 if isinstance(key
, str):
249 key
= key
.decode(_encoding
)
250 elif isinstance(key
, basestring
):
252 # JavaScript is weakly typed for these, so it makes sense to
253 # also allow them. Many encoders seem to do something like this.
254 elif isinstance(key
, float):
255 key
= floatstr(key
, allow_nan
)
256 elif isinstance(key
, (int, long)):
267 raise TypeError("key %r is not a string" % (key
,))
274 for chunk
in self
._iterencode
(value
, markers
):
276 if newline_indent
is not None:
277 self
.current_indent_level
-= 1
278 yield self
._newline
_indent
()
280 if markers
is not None:
281 del markers
[markerid
]
283 def _iterencode(self
, o
, markers
=None):
284 if isinstance(o
, basestring
):
285 if self
.ensure_ascii
:
286 encoder
= encode_basestring_ascii
288 encoder
= encode_basestring
289 _encoding
= self
.encoding
290 if (_encoding
is not None and isinstance(o
, str)
291 and not (_encoding
== 'utf-8')):
292 o
= o
.decode(_encoding
)
300 elif isinstance(o
, (int, long)):
302 elif isinstance(o
, float):
303 yield floatstr(o
, self
.allow_nan
)
304 elif isinstance(o
, (list, tuple)):
305 for chunk
in self
._iterencode
_list
(o
, markers
):
307 elif isinstance(o
, dict):
308 for chunk
in self
._iterencode
_dict
(o
, markers
):
311 if markers
is not None:
313 if markerid
in markers
:
314 raise ValueError("Circular reference detected")
315 markers
[markerid
] = o
316 for chunk
in self
._iterencode
_default
(o
, markers
):
318 if markers
is not None:
319 del markers
[markerid
]
321 def _iterencode_default(self
, o
, markers
=None):
322 newobj
= self
.default(o
)
323 return self
._iterencode
(newobj
, markers
)
325 def default(self
, o
):
327 Implement this method in a subclass such that it returns
328 a serializable object for ``o``, or calls the base implementation
329 (to raise a ``TypeError``).
331 For example, to support arbitrary iterators, you could
332 implement default like this::
334 def default(self, o):
340 return list(iterable)
341 return JSONEncoder.default(self, o)
343 raise TypeError("%r is not JSON serializable" % (o
,))
347 Return a JSON string representation of a Python data structure.
349 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
350 '{"foo": ["bar", "baz"]}'
352 # This is for extremely simple cases and benchmarks.
353 if isinstance(o
, basestring
):
354 if isinstance(o
, str):
355 _encoding
= self
.encoding
356 if (_encoding
is not None
357 and not (_encoding
== 'utf-8')):
358 o
= o
.decode(_encoding
)
359 if self
.ensure_ascii
:
360 return encode_basestring_ascii(o
)
362 return encode_basestring(o
)
363 # This doesn't pass the iterator directly to ''.join() because the
364 # exceptions aren't as detailed. The list call should be roughly
365 # equivalent to the PySequence_Fast that ''.join() would do.
366 chunks
= list(self
.iterencode(o
))
367 return ''.join(chunks
)
369 def iterencode(self
, o
):
371 Encode the given object and yield each string
372 representation as available.
376 for chunk in JSONEncoder().iterencode(bigobject):
377 mysocket.write(chunk)
379 if self
.check_circular
:
383 return self
._iterencode
(o
, markers
)
385 __all__
= ['JSONEncoder']