]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils/traversal.py
Add new options `--impersonate` and `--list-impersonate-targets`
[yt-dlp.git] / yt_dlp / utils / traversal.py
CommitLineData
69bec673 1import collections.abc
2import contextlib
3import inspect
4import itertools
5import re
ffbd4f2a 6import xml.etree.ElementTree
69bec673 7
8from ._utils import (
9 IDENTITY,
10 NO_DEFAULT,
11 LazyList,
0b6f829b 12 deprecation_warning,
69bec673 13 is_iterable_like,
14 try_call,
15 variadic,
16)
17
18
19def traverse_obj(
20 obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
0b6f829b 21 casesense=True, is_user_input=NO_DEFAULT, traverse_string=False):
69bec673 22 """
23 Safely traverse nested `dict`s and `Iterable`s
24
25 >>> obj = [{}, {"key": "value"}]
26 >>> traverse_obj(obj, (1, "key"))
f9fb3ce8 27 'value'
69bec673 28
29 Each of the provided `paths` is tested and the first producing a valid result will be returned.
30 The next path will also be tested if the path branched but no results could be found.
31 Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
32 Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
33
34 The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
35
36 The keys in the path can be one of:
37 - `None`: Return the current object.
38 - `set`: Requires the only item in the set to be a type or function,
39 like `{type}`/`{func}`. If a `type`, returns only values
40 of this type. If a function, returns `func(obj)`.
41 - `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
42 - `slice`: Branch out and return all values in `obj[key]`.
43 - `Ellipsis`: Branch out and return a list of all values.
44 - `tuple`/`list`: Branch out and return a list of all matching values.
45 Read as: `[traverse_obj(obj, branch) for branch in branches]`.
46 - `function`: Branch out and return values filtered by the function.
47 Read as: `[value for key, value in obj if function(key, value)]`.
48 For `Iterable`s, `key` is the index of the value.
49 For `re.Match`es, `key` is the group number (0 = full match)
50 as well as additionally any group names, if given.
51 - `dict` Transform the current object and return a matching dict.
52 Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
53
54 `tuple`, `list`, and `dict` all support nested paths and branches.
55
56 @params paths Paths which to traverse by.
57 @param default Value to return if the paths do not match.
58 If the last key in the path is a `dict`, it will apply to each value inside
59 the dict instead, depth first. Try to avoid if using nested `dict` keys.
60 @param expected_type If a `type`, only accept final values of this type.
61 If any other callable, try to call the function on each result.
62 If the last key in the path is a `dict`, it will apply to each value inside
63 the dict instead, recursively. This does respect branching paths.
64 @param get_all If `False`, return the first matching result, otherwise all matching ones.
65 @param casesense If `False`, consider string dictionary keys as case insensitive.
66
0b6f829b 67 `traverse_string` is only meant to be used by YoutubeDL.prepare_outtmpl and is not part of the API
69bec673 68
69bec673 69 @param traverse_string Whether to traverse into objects as strings.
70 If `True`, any non-compatible object will first be
71 converted into a string and then traversed into.
72 The return value of that path will be a string instead,
73 not respecting any further branching.
74
75
76 @returns The result of the object traversal.
77 If successful, `get_all=True`, and the path branches at least once,
78 then a list of results is returned instead.
79 If no `default` is given and the last path branches, a `list` of results
80 is always returned. If a path ends on a `dict` that result will always be a `dict`.
81 """
0b6f829b
SS
82 if is_user_input is not NO_DEFAULT:
83 deprecation_warning('The is_user_input parameter is deprecated and no longer works')
84
69bec673 85 casefold = lambda k: k.casefold() if isinstance(k, str) else k
86
87 if isinstance(expected_type, type):
88 type_test = lambda val: val if isinstance(val, expected_type) else None
89 else:
90 type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
91
92 def apply_key(key, obj, is_last):
93 branching = False
94 result = None
95
96 if obj is None and traverse_string:
97 if key is ... or callable(key) or isinstance(key, slice):
98 branching = True
99 result = ()
100
101 elif key is None:
102 result = obj
103
104 elif isinstance(key, set):
105 assert len(key) == 1, 'Set should only be used to wrap a single item'
106 item = next(iter(key))
107 if isinstance(item, type):
108 if isinstance(obj, item):
109 result = obj
110 else:
111 result = try_call(item, args=(obj,))
112
113 elif isinstance(key, (list, tuple)):
114 branching = True
115 result = itertools.chain.from_iterable(
116 apply_path(obj, branch, is_last)[0] for branch in key)
117
118 elif key is ...:
119 branching = True
120 if isinstance(obj, collections.abc.Mapping):
121 result = obj.values()
ffbd4f2a 122 elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element):
69bec673 123 result = obj
124 elif isinstance(obj, re.Match):
125 result = obj.groups()
126 elif traverse_string:
127 branching = False
128 result = str(obj)
129 else:
130 result = ()
131
132 elif callable(key):
133 branching = True
134 if isinstance(obj, collections.abc.Mapping):
135 iter_obj = obj.items()
ffbd4f2a 136 elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element):
69bec673 137 iter_obj = enumerate(obj)
138 elif isinstance(obj, re.Match):
139 iter_obj = itertools.chain(
140 enumerate((obj.group(), *obj.groups())),
141 obj.groupdict().items())
142 elif traverse_string:
143 branching = False
144 iter_obj = enumerate(str(obj))
145 else:
146 iter_obj = ()
147
148 result = (v for k, v in iter_obj if try_call(key, args=(k, v)))
149 if not branching: # string traversal
150 result = ''.join(result)
151
152 elif isinstance(key, dict):
153 iter_obj = ((k, _traverse_obj(obj, v, False, is_last)) for k, v in key.items())
154 result = {
155 k: v if v is not None else default for k, v in iter_obj
156 if v is not None or default is not NO_DEFAULT
157 } or None
158
159 elif isinstance(obj, collections.abc.Mapping):
160 result = (try_call(obj.get, args=(key,)) if casesense or try_call(obj.__contains__, args=(key,)) else
161 next((v for k, v in obj.items() if casefold(k) == key), None))
162
163 elif isinstance(obj, re.Match):
164 if isinstance(key, int) or casesense:
165 with contextlib.suppress(IndexError):
166 result = obj.group(key)
167
168 elif isinstance(key, str):
169 result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
170
171 elif isinstance(key, (int, slice)):
ffbd4f2a 172 if is_iterable_like(obj, (collections.abc.Sequence, xml.etree.ElementTree.Element)):
69bec673 173 branching = isinstance(key, slice)
174 with contextlib.suppress(IndexError):
175 result = obj[key]
176 elif traverse_string:
177 with contextlib.suppress(IndexError):
178 result = str(obj)[key]
179
ffbd4f2a
SS
180 elif isinstance(obj, xml.etree.ElementTree.Element) and isinstance(key, str):
181 xpath, _, special = key.rpartition('/')
182 if not special.startswith('@') and special != 'text()':
183 xpath = key
184 special = None
185
186 # Allow abbreviations of relative paths, absolute paths error
187 if xpath.startswith('/'):
188 xpath = f'.{xpath}'
189 elif xpath and not xpath.startswith('./'):
190 xpath = f'./{xpath}'
191
192 def apply_specials(element):
193 if special is None:
194 return element
195 if special == '@':
196 return element.attrib
197 if special.startswith('@'):
198 return try_call(element.attrib.get, args=(special[1:],))
199 if special == 'text()':
200 return element.text
201 assert False, f'apply_specials is missing case for {special!r}'
202
203 if xpath:
204 result = list(map(apply_specials, obj.iterfind(xpath)))
205 else:
206 result = apply_specials(obj)
207
69bec673 208 return branching, result if branching else (result,)
209
210 def lazy_last(iterable):
211 iterator = iter(iterable)
212 prev = next(iterator, NO_DEFAULT)
213 if prev is NO_DEFAULT:
214 return
215
216 for item in iterator:
217 yield False, prev
218 prev = item
219
220 yield True, prev
221
222 def apply_path(start_obj, path, test_type):
223 objs = (start_obj,)
224 has_branched = False
225
226 key = None
227 for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
69bec673 228 if not casesense and isinstance(key, str):
229 key = key.casefold()
230
231 if __debug__ and callable(key):
232 # Verify function signature
233 inspect.signature(key).bind(None, None)
234
235 new_objs = []
236 for obj in objs:
237 branching, results = apply_key(key, obj, last)
238 has_branched |= branching
239 new_objs.append(results)
240
241 objs = itertools.chain.from_iterable(new_objs)
242
243 if test_type and not isinstance(key, (dict, list, tuple)):
244 objs = map(type_test, objs)
245
246 return objs, has_branched, isinstance(key, dict)
247
248 def _traverse_obj(obj, path, allow_empty, test_type):
249 results, has_branched, is_dict = apply_path(obj, path, test_type)
250 results = LazyList(item for item in results if item not in (None, {}))
251 if get_all and has_branched:
252 if results:
253 return results.exhaust()
254 if allow_empty:
255 return [] if default is NO_DEFAULT else default
256 return None
257
258 return results[0] if results else {} if allow_empty and is_dict else None
259
260 for index, path in enumerate(paths, 1):
261 result = _traverse_obj(obj, path, index == len(paths), True)
262 if result is not None:
263 return result
264
265 return None if default is NO_DEFAULT else default
266
267
268def get_first(obj, *paths, **kwargs):
269 return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)
270
271
272def dict_get(d, key_or_keys, default=None, skip_false_values=True):
273 for val in map(d.get, variadic(key_or_keys)):
274 if val is not None and (val or not skip_false_values):
275 return val
276 return default