import calendar
import codecs
import collections
+import collections.abc
import contextlib
import datetime
import email.header
])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?})\s*</script>'
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
NUMBER_RE = r'\d+(?:\.\d+)?'
def decode(self, s):
if self.transform_source:
s = self.transform_source(s)
- if self.ignore_extra:
- return self.raw_decode(s.lstrip())[0]
- return super().decode(s)
+ try:
+ if self.ignore_extra:
+ return self.raw_decode(s.lstrip())[0]
+ return super().decode(s)
+ except json.JSONDecodeError as e:
+ if e.pos is not None:
+ raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
+ raise
def sanitize_open(filename, open_mode):
@classmethod
def run(cls, *args, timeout=None, **kwargs):
with cls(*args, **kwargs) as proc:
+ default = '' if proc.text_mode else b''
stdout, stderr = proc.communicate_or_kill(timeout=timeout)
- return stdout or '', stderr or '', proc.returncode
+ return stdout or default, stderr or default, proc.returncode
def get_subprocess_encoding():
raise
+def is_path_like(f):
+ return isinstance(f, (str, bytes, os.PathLike))
+
+
class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
"""
See [1] for cookie file format.
def __init__(self, filename=None, *args, **kwargs):
super().__init__(None, *args, **kwargs)
- if self.is_path(filename):
+ if is_path_like(filename):
filename = os.fspath(filename)
self.filename = filename
def _true_or_false(cndn):
return 'TRUE' if cndn else 'FALSE'
- @staticmethod
- def is_path(file):
- return isinstance(file, (str, bytes, os.PathLike))
-
@contextlib.contextmanager
def open(self, file, *, write=False):
- if self.is_path(file):
+ if is_path_like(file):
with open(file, 'w' if write else 'r', encoding='utf-8') as f:
yield f
else:
if f'{line.strip()} '[0] in '[{"':
raise http.cookiejar.LoadError(
'Cookies file must be Netscape formatted, not JSON. See '
- 'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl')
+ 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
continue
cf.seek(0)
python_implementation = platform.python_implementation()
if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
+ libc_ver = []
+ with contextlib.suppress(OSError): # We may not have access to the executable
+ libc_ver = platform.libc_ver()
return 'Python %s (%s %s) - %s %s' % (
platform.python_version(),
python_implementation,
platform.architecture()[0],
platform.platform(),
- format_field(join_nonempty(*platform.libc_ver(), delim=' '), None, '(%s)'),
+ format_field(join_nonempty(*libc_ver, delim=' '), None, '(%s)'),
)
def base_url(url):
- return re.match(r'https?://[^?#&]+/', url).group()
+ return re.match(r'https?://[^?#]+/', url).group()
def urljoin(base, path):
datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
elif isinstance(timestamp, str): # assume YYYYMMDD
datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
+ date_format = re.sub( # Support %s on windows
+ r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
return datetime_object.strftime(date_format)
except (ValueError, TypeError, AttributeError):
return default
return out, content_type
+def variadic(x, allowed_types=(str, bytes, dict)):
+ return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
+
+
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
for val in map(d.get, variadic(key_or_keys)):
if val is not None and (val or not skip_false_values):
for f in funcs:
try:
val = f(*args, **kwargs)
- except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
+ except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
pass
else:
if expected_type is None or isinstance(val, expected_type):
return '"%d":' % i if v.endswith(':') else '%d' % i
if v in vars:
- return vars[v]
+ return json.dumps(vars[v])
if strict:
raise ValueError(f'Unknown value: {v}')
code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
if not strict:
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
+ code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
COMPATIBLE_CODECS = {
'mp4': {
'av1', 'hevc', 'avc1', 'mp4a', # fourcc (m3u8, mpd)
- 'h264', 'aacl', # Set in ISM
+ 'h264', 'aacl', 'ec-3', # Set in ISM
},
'webm': {
'av1', 'vp9', 'vp8', 'opus', 'vrbs',
ext = determine_ext(url)
if ext == 'm3u8':
- return 'm3u8'
+ return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
elif ext == 'f4m':
return 'f4m'
self.chapters, self.ranges = chapters, ranges
def __call__(self, info_dict, ydl):
+ if not self.ranges and not self.chapters:
+ yield {}
+
warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
else 'Cannot match chapters since chapter information is unavailable')
for regex in self.chapters or []:
def traverse_obj(
- obj, *path_list, default=None, expected_type=None, get_all=True,
+ obj, *paths, default=None, expected_type=None, get_all=True,
casesense=True, is_user_input=False, traverse_string=False):
- ''' Traverse nested list/dict/tuple
- @param path_list A list of paths which are checked one by one.
- Each path is a list of keys where each key is a:
- - None: Do nothing
- - string: A dictionary key
- - int: An index into a list
- - tuple: A list of keys all of which will be traversed
- - Ellipsis: Fetch all values in the object
- - Function: Takes the key and value as arguments
- and returns whether the key matches or not
- @param default Default value to return
- @param expected_type Only accept final value of this type (Can also be any callable)
- @param get_all Return all the values obtained from a path or only the first one
- @param casesense Whether to consider dictionary keys as case sensitive
- @param is_user_input Whether the keys are generated from user input. If True,
- strings are converted to int/slice if necessary
- @param traverse_string Whether to traverse inside strings. If True, any
- non-compatible object will also be converted into a string
- # TODO: Write tests
- '''
- if not casesense:
- _lower = lambda k: (k.lower() if isinstance(k, str) else k)
- path_list = (map(_lower, variadic(path)) for path in path_list)
-
- def _traverse_obj(obj, path, _current_depth=0):
- nonlocal depth
- path = tuple(variadic(path))
- for i, key in enumerate(path):
- if None in (key, obj):
- return obj
- if isinstance(key, (list, tuple)):
- obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
- key = ...
- if key is ...:
- obj = (obj.values() if isinstance(obj, dict)
- else obj if isinstance(obj, (list, tuple, LazyList))
- else str(obj) if traverse_string else [])
- _current_depth += 1
- depth = max(depth, _current_depth)
- return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
- elif callable(key):
- if isinstance(obj, (list, tuple, LazyList)):
- obj = enumerate(obj)
- elif isinstance(obj, dict):
- obj = obj.items()
- else:
- if not traverse_string:
- return None
- obj = str(obj)
- _current_depth += 1
- depth = max(depth, _current_depth)
- return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
- elif isinstance(obj, dict) and not (is_user_input and key == ':'):
- obj = (obj.get(key) if casesense or (key in obj)
- else next((v for k, v in obj.items() if _lower(k) == key), None))
- else:
- if is_user_input:
- key = (int_or_none(key) if ':' not in key
- else slice(*map(int_or_none, key.split(':'))))
- if key == slice(None):
- return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
- if not isinstance(key, (int, slice)):
- return None
- if not isinstance(obj, (list, tuple, LazyList)):
- if not traverse_string:
- return None
- obj = str(obj)
- try:
- obj = obj[key]
- except IndexError:
- return None
- return obj
+ """
+ Safely traverse nested `dict`s and `Sequence`s
+
+ >>> obj = [{}, {"key": "value"}]
+ >>> traverse_obj(obj, (1, "key"))
+ "value"
+
+ Each of the provided `paths` is tested and the first producing a valid result will be returned.
+ A value of None is treated as the absence of a value.
+
+ The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
+
+ The keys in the path can be one of:
+ - `None`: Return the current object.
+ - `str`/`int`: Return `obj[key]`.
+ - `slice`: Branch out and return all values in `obj[key]`.
+ - `Ellipsis`: Branch out and return a list of all values.
+ - `tuple`/`list`: Branch out and return a list of all matching values.
+ Read as: `[traverse_obj(obj, branch) for branch in branches]`.
+ - `function`: Branch out and return values filtered by the function.
+ Read as: `[value for key, value in obj if function(key, value)]`.
+ For `Sequence`s, `key` is the index of the value.
+ - `dict` Transform the current object and return a matching dict.
+ Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
+
+ `tuple`, `list`, and `dict` all support nested paths and branches
+
+ @params paths Paths which to traverse by.
+ @param default Value to return if the paths do not match.
+ @param expected_type If a `type`, only accept final values of this type.
+ If any other callable, try to call the function on each result.
+ @param get_all If `False`, return the first matching result, otherwise all matching ones.
+ @param casesense If `False`, consider string dictionary keys as case insensitive.
+
+ The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
+
+ @param is_user_input Whether the keys are generated from user input.
+ If `True` strings get converted to `int`/`slice` if needed.
+ @param traverse_string Whether to traverse into objects as strings.
+ If `True`, any non-compatible object will first be
+ converted into a string and then traversed into.
+
+
+ @returns The result of the object traversal.
+ If successful, `get_all=True`, and the path branches at least once,
+ then a list of results is returned instead.
+ """
+ is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
+ casefold = lambda k: k.casefold() if isinstance(k, str) else k
if isinstance(expected_type, type):
type_test = lambda val: val if isinstance(val, expected_type) else None
else:
- type_test = expected_type or IDENTITY
-
- for path in path_list:
- depth = 0
- val = _traverse_obj(obj, path)
- if val is not None:
- if depth:
- for _ in range(depth - 1):
- val = itertools.chain.from_iterable(v for v in val if v is not None)
- val = [v for v in map(type_test, val) if v is not None]
- if val:
- return val if get_all else val[0]
+ type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
+
+ def apply_key(key, obj):
+ if obj is None:
+ return
+
+ elif key is None:
+ yield obj
+
+ elif isinstance(key, (list, tuple)):
+ for branch in key:
+ _, result = apply_path(obj, branch)
+ yield from result
+
+ elif key is ...:
+ if isinstance(obj, collections.abc.Mapping):
+ yield from obj.values()
+ elif is_sequence(obj):
+ yield from obj
+ elif traverse_string:
+ yield from str(obj)
+
+ elif callable(key):
+ if is_sequence(obj):
+ iter_obj = enumerate(obj)
+ elif isinstance(obj, collections.abc.Mapping):
+ iter_obj = obj.items()
+ elif traverse_string:
+ iter_obj = enumerate(str(obj))
else:
- val = type_test(val)
- if val is not None:
- return val
+ return
+ yield from (v for k, v in iter_obj if try_call(key, args=(k, v)))
+
+ elif isinstance(key, dict):
+ iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
+ yield {k: v if v is not None else default for k, v in iter_obj
+ if v is not None or default is not None}
+
+ elif isinstance(obj, dict):
+ yield (obj.get(key) if casesense or (key in obj)
+ else next((v for k, v in obj.items() if casefold(k) == key), None))
+
+ else:
+ if is_user_input:
+ key = (int_or_none(key) if ':' not in key
+ else slice(*map(int_or_none, key.split(':'))))
+
+ if not isinstance(key, (int, slice)):
+ return
+
+ if not is_sequence(obj):
+ if not traverse_string:
+ return
+ obj = str(obj)
+
+ with contextlib.suppress(IndexError):
+ yield obj[key]
+
+ def apply_path(start_obj, path):
+ objs = (start_obj,)
+ has_branched = False
+
+ for key in variadic(path):
+ if is_user_input and key == ':':
+ key = ...
+
+ if not casesense and isinstance(key, str):
+ key = key.casefold()
+
+ if key is ... or isinstance(key, (list, tuple)) or callable(key):
+ has_branched = True
+
+ key_func = functools.partial(apply_key, key)
+ objs = itertools.chain.from_iterable(map(key_func, objs))
+
+ return has_branched, objs
+
+ def _traverse_obj(obj, path):
+ has_branched, results = apply_path(obj, path)
+ results = LazyList(x for x in map(type_test, results) if x is not None)
+ if results:
+ return results.exhaust() if get_all and has_branched else results[0]
+
+ for path in paths:
+ result = _traverse_obj(obj, path)
+ if result is not None:
+ return result
+
return default
return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
-def variadic(x, allowed_types=(str, bytes, dict)):
- return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
-
-
def time_seconds(**kwargs):
t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
return t.timestamp()
self.parsed_args = self.own_args
for location in opts.config_locations or []:
if location == '-':
+ if location in self._loaded_paths:
+ continue
+ self._loaded_paths.add(location)
self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
continue
location = os.path.join(directory, expand_path(location))