]>
jfr.im git - dlqueue.git/blob - venv/lib/python3.11/site-packages/pip/_vendor/webencodings/tests.py
7 A basic test suite for Encoding.
9 :copyright: Copyright 2012 by Simon Sapin
10 :license: BSD, see LICENSE for details.
14 from __future__
import unicode_literals
16 from . import (lookup
, LABELS
, decode
, encode
, iter_decode
, iter_encode
,
17 IncrementalDecoder
, IncrementalEncoder
, UTF8
)
20 def assert_raises(exception
, function
, *args
, **kwargs
):
22 function(*args
, **kwargs
)
25 else: # pragma: no cover
26 raise AssertionError('Did not raise %s.' % exception
)
30 assert lookup('utf-8').name
== 'utf-8'
31 assert lookup('Utf-8').name
== 'utf-8'
32 assert lookup('UTF-8').name
== 'utf-8'
33 assert lookup('utf8').name
== 'utf-8'
34 assert lookup('utf8').name
== 'utf-8'
35 assert lookup('utf8 ').name
== 'utf-8'
36 assert lookup(' \r\nutf8\t').name
== 'utf-8'
37 assert lookup('u8') is None # Python label.
38 assert lookup('utf-8 ') is None # Non-ASCII white space.
40 assert lookup('US-ASCII').name
== 'windows-1252'
41 assert lookup('iso-8859-1').name
== 'windows-1252'
42 assert lookup('latin1').name
== 'windows-1252'
43 assert lookup('LATIN1').name
== 'windows-1252'
44 assert lookup('latin-1') is None
45 assert lookup('LATİN1') is None # ASCII-only case insensitivity.
48 def test_all_labels():
50 assert decode(b
'', label
) == ('', lookup(label
))
51 assert encode('', label
) == b
''
52 for repeat
in [0, 1, 12]:
53 output
, _
= iter_decode([b
''] * repeat
, label
)
54 assert list(output
) == []
55 assert list(iter_encode([''] * repeat
, label
)) == []
56 decoder
= IncrementalDecoder(label
)
57 assert decoder
.decode(b
'') == ''
58 assert decoder
.decode(b
'', final
=True) == ''
59 encoder
= IncrementalEncoder(label
)
60 assert encoder
.encode('') == b
''
61 assert encoder
.encode('', final
=True) == b
''
62 # All encoding names are valid labels too:
63 for name
in set(LABELS
.values()):
64 assert lookup(name
).name
== name
67 def test_invalid_label():
68 assert_raises(LookupError, decode
, b
'\xEF\xBB\xBF\xc3\xa9', 'invalid')
69 assert_raises(LookupError, encode
, 'é', 'invalid')
70 assert_raises(LookupError, iter_decode
, [], 'invalid')
71 assert_raises(LookupError, iter_encode
, [], 'invalid')
72 assert_raises(LookupError, IncrementalDecoder
, 'invalid')
73 assert_raises(LookupError, IncrementalEncoder
, 'invalid')
77 assert decode(b
'\x80', 'latin1') == ('€', lookup('latin1'))
78 assert decode(b
'\x80', lookup('latin1')) == ('€', lookup('latin1'))
79 assert decode(b
'\xc3\xa9', 'utf8') == ('é', lookup('utf8'))
80 assert decode(b
'\xc3\xa9', UTF8
) == ('é', lookup('utf8'))
81 assert decode(b
'\xc3\xa9', 'ascii') == ('é', lookup('ascii'))
82 assert decode(b
'\xEF\xBB\xBF\xc3\xa9', 'ascii') == ('é', lookup('utf8')) # UTF-8 with BOM
84 assert decode(b
'\xFE\xFF\x00\xe9', 'ascii') == ('é', lookup('utf-16be')) # UTF-16-BE with BOM
85 assert decode(b
'\xFF\xFE\xe9\x00', 'ascii') == ('é', lookup('utf-16le')) # UTF-16-LE with BOM
86 assert decode(b
'\xFE\xFF\xe9\x00', 'ascii') == ('\ue900', lookup('utf-16be'))
87 assert decode(b
'\xFF\xFE\x00\xe9', 'ascii') == ('\ue900', lookup('utf-16le'))
89 assert decode(b
'\x00\xe9', 'UTF-16BE') == ('é', lookup('utf-16be'))
90 assert decode(b
'\xe9\x00', 'UTF-16LE') == ('é', lookup('utf-16le'))
91 assert decode(b
'\xe9\x00', 'UTF-16') == ('é', lookup('utf-16le'))
93 assert decode(b
'\xe9\x00', 'UTF-16BE') == ('\ue900', lookup('utf-16be'))
94 assert decode(b
'\x00\xe9', 'UTF-16LE') == ('\ue900', lookup('utf-16le'))
95 assert decode(b
'\x00\xe9', 'UTF-16') == ('\ue900', lookup('utf-16le'))
99 assert encode('é', 'latin1') == b
'\xe9'
100 assert encode('é', 'utf8') == b
'\xc3\xa9'
101 assert encode('é', 'utf8') == b
'\xc3\xa9'
102 assert encode('é', 'utf-16') == b
'\xe9\x00'
103 assert encode('é', 'utf-16le') == b
'\xe9\x00'
104 assert encode('é', 'utf-16be') == b
'\x00\xe9'
107 def test_iter_decode():
108 def iter_decode_to_string(input, fallback_encoding
):
109 output
, _encoding
= iter_decode(input, fallback_encoding
)
110 return ''.join(output
)
111 assert iter_decode_to_string([], 'latin1') == ''
112 assert iter_decode_to_string([b
''], 'latin1') == ''
113 assert iter_decode_to_string([b
'\xe9'], 'latin1') == 'é'
114 assert iter_decode_to_string([b
'hello'], 'latin1') == 'hello'
115 assert iter_decode_to_string([b
'he', b
'llo'], 'latin1') == 'hello'
116 assert iter_decode_to_string([b
'hell', b
'o'], 'latin1') == 'hello'
117 assert iter_decode_to_string([b
'\xc3\xa9'], 'latin1') == 'é'
118 assert iter_decode_to_string([b
'\xEF\xBB\xBF\xc3\xa9'], 'latin1') == 'é'
119 assert iter_decode_to_string([
120 b
'\xEF\xBB\xBF', b
'\xc3', b
'\xa9'], 'latin1') == 'é'
121 assert iter_decode_to_string([
122 b
'\xEF\xBB\xBF', b
'a', b
'\xc3'], 'latin1') == 'a\uFFFD'
123 assert iter_decode_to_string([
124 b
'', b
'\xEF', b
'', b
'', b
'\xBB\xBF\xc3', b
'\xa9'], 'latin1') == 'é'
125 assert iter_decode_to_string([b
'\xEF\xBB\xBF'], 'latin1') == ''
126 assert iter_decode_to_string([b
'\xEF\xBB'], 'latin1') == 'ï»'
127 assert iter_decode_to_string([b
'\xFE\xFF\x00\xe9'], 'latin1') == 'é'
128 assert iter_decode_to_string([b
'\xFF\xFE\xe9\x00'], 'latin1') == 'é'
129 assert iter_decode_to_string([
130 b
'', b
'\xFF', b
'', b
'', b
'\xFE\xe9', b
'\x00'], 'latin1') == 'é'
131 assert iter_decode_to_string([
132 b
'', b
'h\xe9', b
'llo'], 'x-user-defined') == 'h\uF7E9llo'
135 def test_iter_encode():
136 assert b
''.join(iter_encode([], 'latin1')) == b
''
137 assert b
''.join(iter_encode([''], 'latin1')) == b
''
138 assert b
''.join(iter_encode(['é'], 'latin1')) == b
'\xe9'
139 assert b
''.join(iter_encode(['', 'é', '', ''], 'latin1')) == b
'\xe9'
140 assert b
''.join(iter_encode(['', 'é', '', ''], 'utf-16')) == b
'\xe9\x00'
141 assert b
''.join(iter_encode(['', 'é', '', ''], 'utf-16le')) == b
'\xe9\x00'
142 assert b
''.join(iter_encode(['', 'é', '', ''], 'utf-16be')) == b
'\x00\xe9'
143 assert b
''.join(iter_encode([
144 '', 'h\uF7E9', '', 'llo'], 'x-user-defined')) == b
'h\xe9llo'
147 def test_x_user_defined():
148 encoded
= b
'2,\x0c\x0b\x1aO\xd9#\xcb\x0f\xc9\xbbt\xcf\xa8\xca'
149 decoded
= '2,\x0c\x0b\x1aO\uf7d9#\uf7cb\x0f\uf7c9\uf7bbt\uf7cf\uf7a8\uf7ca'
152 assert decode(encoded
, 'x-user-defined') == (decoded
, lookup('x-user-defined'))
153 assert encode(decoded
, 'x-user-defined') == encoded