2 import xml
.etree
.ElementTree
6 from yt_dlp
.utils
import dict_get
, int_or_none
, str_or_none
7 from yt_dlp
.utils
.traversal
import traverse_obj
16 {'index': 0, 'url': 'https://www.example.com/0'}
,
17 {'index': 1, 'url': 'https://www.example.com/1'}
,
28 def test_dict_get(self
):
36 d
= {**FALSE_VALUES, 'a': 42}
37 assert dict_get(d
, 'a') == 42
38 assert dict_get(d
, 'b') is None
39 assert dict_get(d
, 'b', 42) == 42
40 assert dict_get(d
, ('a',)) == 42
41 assert dict_get(d
, ('b', 'a')) == 42
42 assert dict_get(d
, ('b', 'c', 'a', 'd')) == 42
43 assert dict_get(d
, ('b', 'c')) is None
44 assert dict_get(d
, ('b', 'c'), 42) == 42
45 for key
, false_value
in FALSE_VALUES
.items():
46 assert dict_get(d
, ('b', 'c', key
)) is None
47 assert dict_get(d
, ('b', 'c', key
), skip_false_values
=False) == false_value
49 def test_traversal_base(self
):
50 assert traverse_obj(_TEST_DATA
, ('str',)) == 'str', \
52 assert traverse_obj(_TEST_DATA
, ['str']) == 'str', \
54 assert traverse_obj(_TEST_DATA
, (value
for value
in ("str",))) == 'str', \
56 assert traverse_obj(_TEST_DATA
, 'str') == 'str', \
57 'single items should be treated as a path'
58 assert traverse_obj(_TEST_DATA
, 100) == 100, \
60 assert traverse_obj(_TEST_DATA
, 1.2) == 1.2, \
62 assert traverse_obj(_TEST_DATA
, None) == _TEST_DATA
, \
63 '`None` should not perform any modification'
65 def test_traversal_ellipsis(self
):
66 assert traverse_obj(_TEST_DATA
, ...) == [x
for x
in _TEST_DATA
.values() if x
not in (None, {})], \
67 '`...` should give all non discarded values'
68 assert traverse_obj(_TEST_DATA
, ('urls', 0, ...)) == list(_TEST_DATA
['urls'][0].values()), \
69 '`...` selection for dicts should select all values'
70 assert traverse_obj(_TEST_DATA
, (..., ..., 'url')) == ['https://www.example.com/0', 'https://www.example.com/1'], \
71 'nested `...` queries should work'
72 assert traverse_obj(_TEST_DATA
, (..., ..., 'index')) == list(range(4)), \
73 '`...` query result should be flattened'
74 assert traverse_obj(iter(range(4)), ...) == list(range(4)), \
75 '`...` should accept iterables'
77 def test_traversal_function(self
):
78 filter_func
= lambda x
, y
: x
== 'urls' and isinstance(y
, list)
79 assert traverse_obj(_TEST_DATA
, filter_func
) == [_TEST_DATA
['urls']], \
80 'function as query key should perform a filter based on (key, value)'
81 assert traverse_obj(_TEST_DATA
, lambda _
, x
: isinstance(x
[0], str)) == ['str'], \
82 'exceptions in the query function should be catched'
83 assert traverse_obj(iter(range(4)), lambda _
, x
: x
% 2 == 0) == [0, 2], \
84 'function key should accept iterables'
85 # Wrong function signature should raise (debug mode)
86 with pytest
.raises(Exception):
87 traverse_obj(_TEST_DATA
, lambda a
: ...)
88 with pytest
.raises(Exception):
89 traverse_obj(_TEST_DATA
, lambda a
, b
, c
: ...)
91 def test_traversal_set(self
):
92 # transformation/type, like `expected_type`
93 assert traverse_obj(_TEST_DATA
, (..., {str.upper}
, )) == ['STR'], \
94 'Function in set should be a transformation'
95 assert traverse_obj(_TEST_DATA
, (..., {str}
)) == ['str'], \
96 'Type in set should be a type filter'
97 assert traverse_obj(_TEST_DATA
, {dict}
) == _TEST_DATA
, \
98 'A single set should be wrapped into a path'
99 assert traverse_obj(_TEST_DATA
, (..., {str.upper}
)) == ['STR'], \
100 'Transformation function should not raise'
101 expected
= [x
for x
in map(str_or_none
, _TEST_DATA
.values()) if x
is not None]
102 assert traverse_obj(_TEST_DATA
, (..., {str_or_none}
)) == expected
, \
103 'Function in set should be a transformation'
104 assert traverse_obj(_TEST_DATA
, ('fail', {lambda _: 'const'}
)) == 'const', \
105 'Function in set should always be called'
106 # Sets with length != 1 should raise in debug
107 with pytest
.raises(Exception):
108 traverse_obj(_TEST_DATA
, set())
109 with pytest
.raises(Exception):
110 traverse_obj(_TEST_DATA
, {str.upper, str}
)
112 def test_traversal_slice(self
):
113 _SLICE_DATA
= [0, 1, 2, 3, 4]
115 assert traverse_obj(_TEST_DATA
, ('dict', slice(1))) is None, \
116 'slice on a dictionary should not throw'
117 assert traverse_obj(_SLICE_DATA
, slice(1)) == _SLICE_DATA
[:1], \
118 'slice key should apply slice to sequence'
119 assert traverse_obj(_SLICE_DATA
, slice(1, 2)) == _SLICE_DATA
[1:2], \
120 'slice key should apply slice to sequence'
121 assert traverse_obj(_SLICE_DATA
, slice(1, 4, 2)) == _SLICE_DATA
[1:4:2], \
122 'slice key should apply slice to sequence'
124 def test_traversal_alternatives(self
):
125 assert traverse_obj(_TEST_DATA
, 'fail', 'str') == 'str', \
126 'multiple `paths` should be treated as alternative paths'
127 assert traverse_obj(_TEST_DATA
, 'str', 100) == 'str', \
128 'alternatives should exit early'
129 assert traverse_obj(_TEST_DATA
, 'fail', 'fail') is None, \
130 'alternatives should return `default` if exhausted'
131 assert traverse_obj(_TEST_DATA
, (..., 'fail'), 100) == 100, \
132 'alternatives should track their own branching return'
133 assert traverse_obj(_TEST_DATA
, ('dict', ...), ('data', ...)) == list(_TEST_DATA
['data']), \
134 'alternatives on empty objects should search further'
136 def test_traversal_branching_nesting(self
):
137 assert traverse_obj(_TEST_DATA
, ('urls', (3, 0), 'url')) == ['https://www.example.com/0'], \
138 'tuple as key should be treated as branches'
139 assert traverse_obj(_TEST_DATA
, ('urls', [3, 0], 'url')) == ['https://www.example.com/0'], \
140 'list as key should be treated as branches'
141 assert traverse_obj(_TEST_DATA
, ('urls', ((1, 'fail'), (0, 'url')))) == ['https://www.example.com/0'], \
142 'double nesting in path should be treated as paths'
143 assert traverse_obj(['0', [1, 2]], [(0, 1), 0]) == [1], \
144 'do not fail early on branching'
145 expected
= ['https://www.example.com/0', 'https://www.example.com/1']
146 assert traverse_obj(_TEST_DATA
, ('urls', ((0, ('fail', 'url')), (1, 'url')))) == expected
, \
147 'tripple nesting in path should be treated as branches'
148 assert traverse_obj(_TEST_DATA
, ('urls', ('fail', (..., 'url')))) == expected
, \
149 'ellipsis as branch path start gets flattened'
151 def test_traversal_dict(self
):
152 assert traverse_obj(_TEST_DATA
, {0: 100, 1: 1.2}
) == {0: 100, 1: 1.2}
, \
153 'dict key should result in a dict with the same keys'
154 expected
= {0: 'https://www.example.com/0'}
155 assert traverse_obj(_TEST_DATA
, {0: ('urls', 0, 'url')}
) == expected
, \
156 'dict key should allow paths'
157 expected
= {0: ['https://www.example.com/0']}
158 assert traverse_obj(_TEST_DATA
, {0: ('urls', (3, 0), 'url')}
) == expected
, \
159 'tuple in dict path should be treated as branches'
160 assert traverse_obj(_TEST_DATA
, {0: ('urls', ((1, 'fail'), (0, 'url')))}
) == expected
, \
161 'double nesting in dict path should be treated as paths'
162 expected
= {0: ['https://www.example.com/1', 'https://www.example.com/0']}
163 assert traverse_obj(_TEST_DATA
, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}
) == expected
, \
164 'tripple nesting in dict path should be treated as branches'
165 assert traverse_obj(_TEST_DATA
, {0: 'fail'}
) == {}, \
166 'remove `None` values when top level dict key fails'
167 assert traverse_obj(_TEST_DATA
, {0: 'fail'}
, default
=...) == {0: ...}
, \
168 'use `default` if key fails and `default`'
169 assert traverse_obj(_TEST_DATA
, {0: 'dict'}
) == {}, \
170 'remove empty values when dict key'
171 assert traverse_obj(_TEST_DATA
, {0: 'dict'}
, default
=...) == {0: ...}
, \
172 'use `default` when dict key and `default`'
173 assert traverse_obj(_TEST_DATA
, {0: {0: 'fail'}
}) == {}, \
174 'remove empty values when nested dict key fails'
175 assert traverse_obj(None, {0: 'fail'}
) == {}, \
176 'default to dict if pruned'
177 assert traverse_obj(None, {0: 'fail'}
, default
=...) == {0: ...}
, \
178 'default to dict if pruned and default is given'
179 assert traverse_obj(_TEST_DATA
, {0: {0: 'fail'}
}, default
=...) == {0: {0: ...}
}, \
180 'use nested `default` when nested dict key fails and `default`'
181 assert traverse_obj(_TEST_DATA
, {0: ('dict', ...)}
) == {}, \
182 'remove key if branch in dict key not successful'
184 def test_traversal_default(self
):
185 _DEFAULT_DATA
= {'None': None, 'int': 0, 'list': []}
187 assert traverse_obj(_DEFAULT_DATA
, 'fail') is None, \
188 'default value should be `None`'
189 assert traverse_obj(_DEFAULT_DATA
, 'fail', 'fail', default
=...) == ..., \
190 'chained fails should result in default'
191 assert traverse_obj(_DEFAULT_DATA
, 'None', 'int') == 0, \
192 'should not short cirquit on `None`'
193 assert traverse_obj(_DEFAULT_DATA
, 'fail', default
=1) == 1, \
194 'invalid dict key should result in `default`'
195 assert traverse_obj(_DEFAULT_DATA
, 'None', default
=1) == 1, \
196 '`None` is a deliberate sentinel and should become `default`'
197 assert traverse_obj(_DEFAULT_DATA
, ('list', 10)) is None, \
198 '`IndexError` should result in `default`'
199 assert traverse_obj(_DEFAULT_DATA
, (..., 'fail'), default
=1) == 1, \
200 'if branched but not successful return `default` if defined, not `[]`'
201 assert traverse_obj(_DEFAULT_DATA
, (..., 'fail'), default
=None) is None, \
202 'if branched but not successful return `default` even if `default` is `None`'
203 assert traverse_obj(_DEFAULT_DATA
, (..., 'fail')) == [], \
204 'if branched but not successful return `[]`, not `default`'
205 assert traverse_obj(_DEFAULT_DATA
, ('list', ...)) == [], \
206 'if branched but object is empty return `[]`, not `default`'
207 assert traverse_obj(None, ...) == [], \
208 'if branched but object is `None` return `[]`, not `default`'
209 assert traverse_obj({0: None}
, (0, ...)) == [], \
210 'if branched but state is `None` return `[]`, not `default`'
212 @pytest.mark.parametrize('path', [
215 100 * ('fail',) + (...,),
216 (...,) + 100 * ('fail',),
218 def test_traversal_branching(self
, path
):
219 assert traverse_obj({}, path
) == [], \
220 'if branched but state is `None`, return `[]` (not `default`)'
221 assert traverse_obj({}, 'fail', path
) == [], \
222 'if branching in last alternative and previous did not match, return `[]` (not `default`)'
223 assert traverse_obj({0: 'x'}
, 0, path
) == 'x', \
224 'if branching in last alternative and previous did match, return single value'
225 assert traverse_obj({0: 'x'}
, path
, 0) == 'x', \
226 'if branching in first alternative and non-branching path does match, return single value'
227 assert traverse_obj({}, path
, 'fail') is None, \
228 'if branching in first alternative and non-branching path does not match, return `default`'
230 def test_traversal_expected_type(self
):
231 _EXPECTED_TYPE_DATA
= {'str': 'str', 'int': 0}
233 assert traverse_obj(_EXPECTED_TYPE_DATA
, 'str', expected_type
=str) == 'str', \
234 'accept matching `expected_type` type'
235 assert traverse_obj(_EXPECTED_TYPE_DATA
, 'str', expected_type
=int) is None, \
236 'reject non matching `expected_type` type'
237 assert traverse_obj(_EXPECTED_TYPE_DATA
, 'int', expected_type
=lambda x
: str(x
)) == '0', \
238 'transform type using type function'
239 assert traverse_obj(_EXPECTED_TYPE_DATA
, 'str', expected_type
=lambda _
: 1 / 0) is None, \
240 'wrap expected_type fuction in try_call'
241 assert traverse_obj(_EXPECTED_TYPE_DATA
, ..., expected_type
=str) == ['str'], \
242 'eliminate items that expected_type fails on'
243 assert traverse_obj(_TEST_DATA
, {0: 100, 1: 1.2}
, expected_type
=int) == {0: 100}
, \
244 'type as expected_type should filter dict values'
245 assert traverse_obj(_TEST_DATA
, {0: 100, 1: 1.2, 2: 'None'}
, expected_type
=str_or_none
) == {0: '100', 1: '1.2'}
, \
246 'function as expected_type should transform dict values'
247 assert traverse_obj(_TEST_DATA
, ({0: 1.2}
, 0, {int_or_none}
), expected_type
=int) == 1, \
248 'expected_type should not filter non final dict values'
249 assert traverse_obj(_TEST_DATA
, {0: {0: 100, 1: 'str'}
}, expected_type
=int) == {0: {0: 100}
}, \
250 'expected_type should transform deep dict values'
251 assert traverse_obj(_TEST_DATA
, [({0: '...'}
, {0: '...'}
)], expected_type
=type(...)) == [{0: ...}
, {0: ...}
], \
252 'expected_type should transform branched dict values'
253 assert traverse_obj({1: {3: 4}
}, [(1, 2), 3], expected_type
=int) == [4], \
254 'expected_type regression for type matching in tuple branching'
255 assert traverse_obj(_TEST_DATA
, ['data', ...], expected_type
=int) == [], \
256 'expected_type regression for type matching in dict result'
258 def test_traversal_get_all(self
):
259 _GET_ALL_DATA
= {'key': [0, 1, 2]}
261 assert traverse_obj(_GET_ALL_DATA
, ('key', ...), get_all
=False) == 0, \
262 'if not `get_all`, return only first matching value'
263 assert traverse_obj(_GET_ALL_DATA
, ..., get_all
=False) == [0, 1, 2], \
264 'do not overflatten if not `get_all`'
266 def test_traversal_casesense(self
):
271 0: {'KeY': 'value2'}
,
275 assert traverse_obj(_CASESENSE_DATA
, 'key') is None, \
276 'dict keys should be case sensitive unless `casesense`'
277 assert traverse_obj(_CASESENSE_DATA
, 'keY', casesense
=False) == 'value0', \
278 'allow non matching key case if `casesense`'
279 assert traverse_obj(_CASESENSE_DATA
, [0, ('keY',)], casesense
=False) == ['value1'], \
280 'allow non matching key case in branch if `casesense`'
281 assert traverse_obj(_CASESENSE_DATA
, [0, ([0, 'keY'],)], casesense
=False) == ['value2'], \
282 'allow non matching key case in branch path if `casesense`'
284 def test_traversal_traverse_string(self
):
285 _TRAVERSE_STRING_DATA
= {'str': 'str', 1.2: 1.2}
287 assert traverse_obj(_TRAVERSE_STRING_DATA
, ('str', 0)) is None, \
288 'do not traverse into string if not `traverse_string`'
289 assert traverse_obj(_TRAVERSE_STRING_DATA
, ('str', 0), traverse_string
=True) == 's', \
290 'traverse into string if `traverse_string`'
291 assert traverse_obj(_TRAVERSE_STRING_DATA
, (1.2, 1), traverse_string
=True) == '.', \
292 'traverse into converted data if `traverse_string`'
293 assert traverse_obj(_TRAVERSE_STRING_DATA
, ('str', ...), traverse_string
=True) == 'str', \
294 '`...` should result in string (same value) if `traverse_string`'
295 assert traverse_obj(_TRAVERSE_STRING_DATA
, ('str', slice(0, None, 2)), traverse_string
=True) == 'sr', \
296 '`slice` should result in string if `traverse_string`'
297 assert traverse_obj(_TRAVERSE_STRING_DATA
, ('str', lambda i
, v
: i
or v
== "s"), traverse_string
=True) == 'str', \
298 'function should result in string if `traverse_string`'
299 assert traverse_obj(_TRAVERSE_STRING_DATA
, ('str', (0, 2)), traverse_string
=True) == ['s', 'r'], \
300 'branching should result in list if `traverse_string`'
301 assert traverse_obj({}, (0, ...), traverse_string
=True) == [], \
302 'branching should result in list if `traverse_string`'
303 assert traverse_obj({}, (0, lambda x
, y
: True), traverse_string
=True) == [], \
304 'branching should result in list if `traverse_string`'
305 assert traverse_obj({}, (0, slice(1)), traverse_string
=True) == [], \
306 'branching should result in list if `traverse_string`'
308 def test_traversal_re(self
):
309 mobj
= re
.fullmatch(r
'0(12)(?P<group>3)(4)?', '0123')
310 assert traverse_obj(mobj
, ...) == [x
for x
in mobj
.groups() if x
is not None], \
311 '`...` on a `re.Match` should give its `groups()`'
312 assert traverse_obj(mobj
, lambda k
, _
: k
in (0, 2)) == ['0123', '3'], \
313 'function on a `re.Match` should give groupno, value starting at 0'
314 assert traverse_obj(mobj
, 'group') == '3', \
315 'str key on a `re.Match` should give group with that name'
316 assert traverse_obj(mobj
, 2) == '3', \
317 'int key on a `re.Match` should give group with that name'
318 assert traverse_obj(mobj
, 'gRoUp', casesense
=False) == '3', \
319 'str key on a `re.Match` should respect casesense'
320 assert traverse_obj(mobj
, 'fail') is None, \
321 'failing str key on a `re.Match` should return `default`'
322 assert traverse_obj(mobj
, 'gRoUpS', casesense
=False) is None, \
323 'failing str key on a `re.Match` should return `default`'
324 assert traverse_obj(mobj
, 8) is None, \
325 'failing int key on a `re.Match` should return `default`'
326 assert traverse_obj(mobj
, lambda k
, _
: k
in (0, 'group')) == ['0123', '3'], \
327 'function on a `re.Match` should give group name as well'
329 def test_traversal_xml_etree(self
):
330 etree
= xml
.etree
.ElementTree
.fromstring('''<?xml version="1.0"?>
332 <country name="Liechtenstein">
335 <gdppc>141100</gdppc>
336 <neighbor name="Austria" direction="E"/>
337 <neighbor name="Switzerland" direction="W"/>
339 <country name="Singapore">
343 <neighbor name="Malaysia" direction="N"/>
345 <country name="Panama">
349 <neighbor name="Costa Rica" direction="W"/>
350 <neighbor name="Colombia" direction="E"/>
353 assert traverse_obj(etree
, '') == etree
, \
354 'empty str key should return the element itself'
355 assert traverse_obj(etree
, 'country') == list(etree
), \
356 'str key should lead all children with that tag name'
357 assert traverse_obj(etree
, ...) == list(etree
), \
358 '`...` as key should return all children'
359 assert traverse_obj(etree
, lambda _
, x
: x
[0].text
== '4') == [etree
[1]], \
360 'function as key should get element as value'
361 assert traverse_obj(etree
, lambda i
, _
: i
== 1) == [etree
[1]], \
362 'function as key should get index as key'
363 assert traverse_obj(etree
, 0) == etree
[0], \
364 'int key should return the nth child'
365 expected
= ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia']
366 assert traverse_obj(etree
, './/neighbor/@name') == expected
, \
367 '`@<attribute>` at end of path should give that attribute'
368 assert traverse_obj(etree
, '//neighbor/@fail') == [None, None, None, None, None], \
369 '`@<nonexistant>` at end of path should give `None`'
370 assert traverse_obj(etree
, ('//neighbor/@', 2)) == {'name': 'Malaysia', 'direction': 'N'}
, \
371 '`@` should give the full attribute dict'
372 assert traverse_obj(etree
, '//year/text()') == ['2008', '2011', '2011'], \
373 '`text()` at end of path should give the inner text'
374 assert traverse_obj(etree
, '//*[@direction]/@direction') == ['E', 'W', 'N', 'W', 'E'], \
375 'full Python xpath features should be supported'
376 assert traverse_obj(etree
, (0, '@name')) == 'Liechtenstein', \
377 'special transformations should act on current element'
378 assert traverse_obj(etree
, ('country', 0, ..., 'text()', {int_or_none}
)) == [1, 2008, 141100], \
379 'special transformations should act on current element'