43 import xml
.etree
.ElementTree
45 from . import traversal
47 from ..compat
import functools
# isort: split
48 from ..compat
import (
49 compat_etree_fromstring
,
51 compat_HTMLParseError
,
55 from ..dependencies
import xattr
57 __name__
= __name__
.rsplit('.', 1)[0] # Pretend to be the parent module
59 # This is not clearly defined otherwise
60 compiled_regex_type
= type(re
.compile(''))
71 ENGLISH_MONTH_NAMES
= [
72 'January', 'February', 'March', 'April', 'May', 'June',
73 'July', 'August', 'September', 'October', 'November', 'December']
76 'en': ENGLISH_MONTH_NAMES
,
78 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
79 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
80 # these follow the genitive grammatical case (dopełniacz)
81 # some websites might be using nominative, which will require another month list
82 # https://en.wikibooks.org/wiki/Polish/Noun_cases
83 'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
84 'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
87 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
89 'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
90 'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
91 'EST': -5, 'EDT': -4, # Eastern
92 'CST': -6, 'CDT': -5, # Central
93 'MST': -7, 'MDT': -6, # Mountain
94 'PST': -8, 'PDT': -7 # Pacific
97 # needed for sanitizing filenames in restricted mode
98 ACCENT_CHARS
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
99 itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
100 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
130 '%Y-%m-%d %H:%M:%S.%f',
131 '%Y-%m-%d %H:%M:%S:%f',
134 '%Y-%m-%dT%H:%M:%SZ',
135 '%Y-%m-%dT%H:%M:%S.%fZ',
136 '%Y-%m-%dT%H:%M:%S.%f0Z',
138 '%Y-%m-%dT%H:%M:%S.%f',
141 '%b %d %Y at %H:%M:%S',
143 '%B %d %Y at %H:%M:%S',
147 DATE_FORMATS_DAY_FIRST
= list(DATE_FORMATS
)
148 DATE_FORMATS_DAY_FIRST
.extend([
159 DATE_FORMATS_MONTH_FIRST
= list(DATE_FORMATS
)
160 DATE_FORMATS_MONTH_FIRST
.extend([
168 PACKED_CODES_RE
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
169 JSON_LD_RE
= r
'(?is)<script[^>]+type=(["\']?
)application
/ld\
+json\
1[^
>]*>\s
*(?P
<json_ld
>{.+?}|\
[.+?\
])\s
*</script
>'
171 NUMBER_RE = r'\d
+(?
:\
.\d
+)?
'
175 def preferredencoding():
176 """Get preferred encoding.
178 Returns the best encoding scheme for the system, based on
179 locale.getpreferredencoding() and some further tweaks.
182 pref = locale.getpreferredencoding()
190 def write_json_file(obj, fn):
191 """ Encode obj as JSON and write it to fn, atomically if possible """
193 tf = tempfile.NamedTemporaryFile(
194 prefix=f'{os.path.basename(fn)}
.', dir=os.path.dirname(fn),
195 suffix='.tmp
', delete=False, mode='w
', encoding='utf
-8')
199 json.dump(obj, tf, ensure_ascii=False)
200 if sys.platform == 'win32
':
201 # Need to remove existing file on Windows, else os.rename raises
202 # WindowsError or FileExistsError.
203 with contextlib.suppress(OSError):
205 with contextlib.suppress(OSError):
208 os.chmod(tf.name, 0o666 & ~mask)
209 os.rename(tf.name, fn)
211 with contextlib.suppress(OSError):
216 def find_xpath_attr(node, xpath, key, val=None):
217 """ Find the xpath xpath[@key=val] """
218 assert re.match(r'^
[a
-zA
-Z_
-]+$
', key)
219 expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}
']")
220 return node.find(expr)
222 # On python2.6 the xml.etree.ElementTree.Element methods don't support
223 # the namespace parameter
226 def xpath_with_ns(path
, ns_map
):
227 components
= [c
.split(':') for c
in path
.split('/')]
231 replaced
.append(c
[0])
234 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
235 return '/'.join(replaced
)
238 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
239 def _find_xpath(xpath
):
240 return node
.find(xpath
)
242 if isinstance(xpath
, str):
243 n
= _find_xpath(xpath
)
251 if default
is not NO_DEFAULT
:
254 name
= xpath
if name
is None else name
255 raise ExtractorError('Could not find XML element %s' % name
)
261 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
262 n
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
)
263 if n
is None or n
== default
:
266 if default
is not NO_DEFAULT
:
269 name
= xpath
if name
is None else name
270 raise ExtractorError('Could not find XML element\'s text %s' % name
)
276 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
):
277 n
= find_xpath_attr(node
, xpath
, key
)
279 if default
is not NO_DEFAULT
:
282 name
= f
'{xpath}[@{key}]' if name
is None else name
283 raise ExtractorError('Could not find XML attribute %s' % name
)
289 def get_element_by_id(id, html
, **kwargs
):
290 """Return the content of the tag with the specified ID in the passed HTML document"""
291 return get_element_by_attribute('id', id, html
, **kwargs
)
294 def get_element_html_by_id(id, html
, **kwargs
):
295 """Return the html of the tag with the specified ID in the passed HTML document"""
296 return get_element_html_by_attribute('id', id, html
, **kwargs
)
299 def get_element_by_class(class_name
, html
):
300 """Return the content of the first tag with the specified class in the passed HTML document"""
301 retval
= get_elements_by_class(class_name
, html
)
302 return retval
[0] if retval
else None
305 def get_element_html_by_class(class_name
, html
):
306 """Return the html of the first tag with the specified class in the passed HTML document"""
307 retval
= get_elements_html_by_class(class_name
, html
)
308 return retval
[0] if retval
else None
311 def get_element_by_attribute(attribute
, value
, html
, **kwargs
):
312 retval
= get_elements_by_attribute(attribute
, value
, html
, **kwargs
)
313 return retval
[0] if retval
else None
316 def get_element_html_by_attribute(attribute
, value
, html
, **kargs
):
317 retval
= get_elements_html_by_attribute(attribute
, value
, html
, **kargs
)
318 return retval
[0] if retval
else None
321 def get_elements_by_class(class_name
, html
, **kargs
):
322 """Return the content of all tags with the specified class in the passed HTML document as a list"""
323 return get_elements_by_attribute(
324 'class', r
'[^\'"]*(?<=[\'"\s
])%s(?
=[\'"\s])[^\'"]*' % re.escape(class_name),
325 html, escape_value=False)
328 def get_elements_html_by_class(class_name, html):
329 """Return the html of all tags with the specified class in the passed HTML document as a list"""
330 return get_elements_html_by_attribute(
331 'class', r'[^
\'"]*(?<=[\'"\s
])%s(?
=[\'"\s])[^\'"]*' % re.escape(class_name),
332 html, escape_value=False)
335 def get_elements_by_attribute(*args, **kwargs):
336 """Return the content of the tag with the specified attribute in the passed HTML document"""
337 return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
340 def get_elements_html_by_attribute(*args, **kwargs):
341 """Return the html of the tag with the specified attribute in the passed HTML document"""
342 return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
345 def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w
:.-]+', escape_value=True):
347 Return the text (content) and the html (whole) of the tag with the specified
348 attribute in the passed HTML document
353 quote = '' if re.match(r'''[\s"'`
=<>]''', value) else '?'
355 value = re.escape(value) if escape_value else value
357 partial_element_re = rf'''(?x
)
359 (?
:\
s(?
:[^
>"']|"[^
"]*"|
'[^']*')*)?
360 \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
363 for m in re.finditer(partial_element_re, html):
364 content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
367 unescapeHTML(re.sub(r'^(?P<q>["\'])(?P
<content
>.*)(?P
=q
)$
', r'\g
<content
>', content, flags=re.DOTALL)),
372 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
374 HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
375 closing tag for the first opening tag it has encountered, and can be used
379 class HTMLBreakOnClosingTagException(Exception):
383 self.tagstack = collections.deque()
384 html.parser.HTMLParser.__init__(self)
389 def __exit__(self, *_):
393 # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
394 # so data remains buffered; we no longer have any interest in it, thus
395 # override this method to discard it
398 def handle_starttag(self, tag, _):
399 self.tagstack.append(tag)
401 def handle_endtag(self, tag):
402 if not self.tagstack:
403 raise compat_HTMLParseError('no tags
in the stack
')
405 inner_tag = self.tagstack.pop()
409 raise compat_HTMLParseError(f'matching opening tag
for closing {tag} tag
not found
')
410 if not self.tagstack:
411 raise self.HTMLBreakOnClosingTagException()
414 # XXX: This should be far less strict
415 def get_element_text_and_html_by_tag(tag, html):
417 For the first element with the specified tag in the passed HTML document
418 return its' content (text
) and the whole
element (html
)
420 def find_or_raise(haystack, needle, exc):
422 return haystack.index(needle)
425 closing_tag = f'</{tag}>'
426 whole_start = find_or_raise(
427 html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
428 content_start = find_or_raise(
429 html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
430 content_start += whole_start + 1
431 with HTMLBreakOnClosingTagParser() as parser:
432 parser.feed(html[whole_start:content_start])
433 if not parser.tagstack or parser.tagstack[0] != tag:
434 raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
435 offset = content_start
436 while offset < len(html):
437 next_closing_tag_start = find_or_raise(
438 html[offset:], closing_tag,
439 compat_HTMLParseError(f'closing {tag} tag not found'))
440 next_closing_tag_end = next_closing_tag_start + len(closing_tag)
442 parser.feed(html[offset:offset + next_closing_tag_end])
443 offset += next_closing_tag_end
444 except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
445 return html[content_start:offset + next_closing_tag_start], \
446 html[whole_start:offset + next_closing_tag_end]
447 raise compat_HTMLParseError('unexpected end of html')
450 class HTMLAttributeParser(html.parser.HTMLParser):
451 """Trivial HTML parser to gather the attributes
for a single element
"""
455 html.parser.HTMLParser.__init__(self)
457 def handle_starttag(self, tag, attrs):
458 self.attrs = dict(attrs)
459 raise compat_HTMLParseError('done')
462 class HTMLListAttrsParser(html.parser.HTMLParser):
463 """HTML parser to gather the attributes
for the elements of a
list"""
466 html.parser.HTMLParser.__init__(self)
470 def handle_starttag(self, tag, attrs):
471 if tag == 'li' and self._level == 0:
472 self.items.append(dict(attrs))
475 def handle_endtag(self, tag):
479 def extract_attributes(html_element):
480 """Given a string
for an HTML element such
as
482 a
="foo" B
="bar" c
="&98;az" d
=boz
483 empty
= noval entity
="&"
486 Decode
and return a dictionary of attributes
.
488 'a': 'foo', 'b': 'bar', c
: 'baz', d
: 'boz',
489 'empty': '', 'noval': None, 'entity': '&',
490 'sq': '"', 'dq': '\''
493 parser = HTMLAttributeParser()
494 with contextlib.suppress(compat_HTMLParseError):
495 parser.feed(html_element)
500 def parse_list(webpage):
501 """Given a string
for an series of HTML
<li
> elements
,
502 return a dictionary of their attributes
"""
503 parser = HTMLListAttrsParser()
509 def clean_html(html):
510 """Clean an HTML snippet into a readable string
"""
512 if html is None: # Convenience for sanitizing descriptions etc.
515 html = re.sub(r'\s+', ' ', html)
516 html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
517 html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
519 html = re.sub('<.*?>', '', html)
520 # Replace html entities
521 html = unescapeHTML(html)
525 class LenientJSONDecoder(json.JSONDecoder):
527 def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs):
528 self.transform_source, self.ignore_extra = transform_source, ignore_extra
529 self._close_attempts = 2 * close_objects
530 super().__init__(*args, **kwargs)
533 def _close_object(err):
534 doc = err.doc[:err.pos]
535 # We need to add comma first to get the correct error message
536 if err.msg.startswith('Expecting \',\''):
538 elif not doc.endswith(','):
541 if err.msg.startswith('Expecting property name'):
542 return doc[:-1] + '}'
543 elif err.msg.startswith('Expecting value'):
544 return doc[:-1] + ']'
547 if self.transform_source:
548 s = self.transform_source(s)
549 for attempt in range(self._close_attempts + 1):
551 if self.ignore_extra:
552 return self.raw_decode(s.lstrip())[0]
553 return super().decode(s)
554 except json.JSONDecodeError as e:
557 elif attempt < self._close_attempts:
558 s = self._close_object(e)
561 raise type(e)(f'{e.msg} in {s[e.pos - 10:e.pos + 10]!r}', s, e.pos)
562 assert False, 'Too many attempts to decode JSON'
565 def sanitize_open(filename, open_mode):
566 """Try to
open the given filename
, and slightly tweak it
if this fails
.
568 Attempts to
open the given filename
. If this fails
, it tries to change
569 the filename slightly
, step by step
, until it
's either able to open it
570 or it fails and raises a final exception, like the standard open()
573 It returns the tuple (stream, definitive_file_name).
576 if sys.platform == 'win32
':
579 # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
580 with contextlib.suppress(io.UnsupportedOperation):
581 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
582 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
584 for attempt in range(2):
587 if sys.platform == 'win32
':
588 # FIXME: An exclusive lock also locks the file from being read.
589 # Since windows locks are mandatory, don't lock the
file on
windows (for now
).
590 # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
591 raise LockingUnsupportedError()
592 stream
= locked_file(filename
, open_mode
, block
=False).__enter
__()
594 stream
= open(filename
, open_mode
)
595 return stream
, filename
596 except OSError as err
:
597 if attempt
or err
.errno
in (errno
.EACCES
,):
599 old_filename
, filename
= filename
, sanitize_path(filename
)
600 if old_filename
== filename
:
604 def timeconvert(timestr
):
605 """Convert RFC 2822 defined time string into system timestamp"""
607 timetuple
= email
.utils
.parsedate_tz(timestr
)
608 if timetuple
is not None:
609 timestamp
= email
.utils
.mktime_tz(timetuple
)
613 def sanitize_filename(s
, restricted
=False, is_id
=NO_DEFAULT
):
614 """Sanitizes a string so it could be used as part of a filename.
615 @param restricted Use a stricter subset of allowed characters
616 @param is_id Whether this is an ID that should be kept unchanged if possible.
617 If unset, yt-dlp's new sanitization rules are in effect
622 def replace_insane(char
):
623 if restricted
and char
in ACCENT_CHARS
:
624 return ACCENT_CHARS
[char
]
625 elif not restricted
and char
== '\n':
627 elif is_id
is NO_DEFAULT
and not restricted
and char
in '"*:<>?|/\\':
628 # Replace with their full-width unicode counterparts
629 return {'/': '\u29F8', '\\': '\u29f9'}
.get(char
, chr(ord(char
) + 0xfee0))
630 elif char
== '?' or ord(char
) < 32 or ord(char
) == 127:
633 return '' if restricted
else '\''
635 return '\0_\0-' if restricted
else '\0 \0-'
636 elif char
in '\\/|*<>':
638 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace() or ord(char
) > 127):
639 return '' if unicodedata
.category(char
)[0] in 'CM' else '\0_'
642 # Replace look-alike Unicode glyphs
643 if restricted
and (is_id
is NO_DEFAULT
or not is_id
):
644 s
= unicodedata
.normalize('NFKC', s
)
645 s
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) # Handle timestamps
646 result
= ''.join(map(replace_insane
, s
))
647 if is_id
is NO_DEFAULT
:
648 result
= re
.sub(r
'(\0.)(?:(?=\1)..)+', r
'\1', result
) # Remove repeated substitute chars
649 STRIP_RE
= r
'(?:\0.|[ _-])*'
650 result
= re
.sub(f
'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result
) # Remove substitute chars from start/end
651 result
= result
.replace('\0', '') or '_'
654 while '__' in result
:
655 result
= result
.replace('__', '_')
656 result
= result
.strip('_')
657 # Common case of "Foreign band name - English song title"
658 if restricted
and result
.startswith('-_'):
660 if result
.startswith('-'):
661 result
= '_' + result
[len('-'):]
662 result
= result
.lstrip('.')
668 def sanitize_path(s
, force
=False):
669 """Sanitizes and normalizes path on Windows"""
670 # XXX: this handles drive relative paths (c:sth) incorrectly
671 if sys
.platform
== 'win32':
673 drive_or_unc
, _
= os
.path
.splitdrive(s
)
679 norm_path
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
)
683 path_part
if path_part
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
)
684 for path_part
in norm_path
]
686 sanitized_path
.insert(0, drive_or_unc
+ os
.path
.sep
)
687 elif force
and s
and s
[0] == os
.path
.sep
:
688 sanitized_path
.insert(0, os
.path
.sep
)
689 # TODO: Fix behavioral differences <3.12
690 # The workaround using `normpath` only superficially passes tests
691 # Ref: https://github.com/python/cpython/pull/100351
692 return os
.path
.normpath(os
.path
.join(*sanitized_path
))
695 def sanitize_url(url
, *, scheme
='http'):
696 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
697 # the number of unwanted failures due to missing protocol
700 elif url
.startswith('//'):
701 return f
'{scheme}:{url}'
702 # Fix some common typos seen so far
704 # https://github.com/ytdl-org/youtube-dl/issues/15649
705 (r
'^httpss://', r
'https://'),
706 # https://bx1.be/lives/direct-tv/
707 (r
'^rmtp([es]?)://', r
'rtmp\1://'),
709 for mistake
, fixup
in COMMON_TYPOS
:
710 if re
.match(mistake
, url
):
711 return re
.sub(mistake
, fixup
, url
)
715 def extract_basic_auth(url
):
716 parts
= urllib
.parse
.urlsplit(url
)
717 if parts
.username
is None:
719 url
= urllib
.parse
.urlunsplit(parts
._replace
(netloc
=(
720 parts
.hostname
if parts
.port
is None
721 else '%s:%d' % (parts
.hostname
, parts
.port
))))
722 auth_payload
= base64
.b64encode(
723 ('%s:%s' % (parts
.username
, parts
.password
or '')).encode())
724 return url
, f
'Basic {auth_payload.decode()}'
728 """Expand shell variables and ~"""
729 return os
.path
.expandvars(compat_expanduser(s
))
732 def orderedSet(iterable
, *, lazy
=False):
733 """Remove all duplicates from the input iterable"""
735 seen
= [] # Do not use set since the items can be unhashable
741 return _iter() if lazy
else list(_iter())
744 def _htmlentity_transform(entity_with_semicolon
):
745 """Transforms an HTML entity to a character."""
746 entity
= entity_with_semicolon
[:-1]
748 # Known non-numeric HTML entity
749 if entity
in html
.entities
.name2codepoint
:
750 return chr(html
.entities
.name2codepoint
[entity
])
752 # TODO: HTML5 allows entities without a semicolon.
753 # E.g. 'Éric' should be decoded as 'Éric'.
754 if entity_with_semicolon
in html
.entities
.html5
:
755 return html
.entities
.html5
[entity_with_semicolon
]
757 mobj
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
)
759 numstr
= mobj
.group(1)
760 if numstr
.startswith('x'):
762 numstr
= '0%s' % numstr
765 # See https://github.com/ytdl-org/youtube-dl/issues/7518
766 with contextlib
.suppress(ValueError):
767 return chr(int(numstr
, base
))
769 # Unknown entity in name, return its literal representation
770 return '&%s;' % entity
776 assert isinstance(s
, str)
779 r
'&([^&;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
)
782 def escapeHTML(text
):
785 .replace('&', '&')
786 .replace('<', '<')
787 .replace('>', '>')
788 .replace('"', '"')
789 .replace("'", ''')
793 class netrc_from_content(netrc
.netrc
):
794 def __init__(self
, content
):
795 self
.hosts
, self
.macros
= {}, {}
796 with io
.StringIO(content
) as stream
:
797 self
._parse
('-', stream
, False)
800 class Popen(subprocess
.Popen
):
801 if sys
.platform
== 'win32':
802 _startupinfo
= subprocess
.STARTUPINFO()
803 _startupinfo
.dwFlags |
= subprocess
.STARTF_USESHOWWINDOW
808 def _fix_pyinstaller_ld_path(env
):
809 """Restore LD_LIBRARY_PATH when using PyInstaller
810 Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
811 https://github.com/yt-dlp/yt-dlp/issues/4573
813 if not hasattr(sys
, '_MEIPASS'):
817 orig
= env
.get(f
'{key}_ORIG')
823 _fix('LD_LIBRARY_PATH') # Linux
824 _fix('DYLD_LIBRARY_PATH') # macOS
826 def __init__(self
, args
, *remaining
, env
=None, text
=False, shell
=False, **kwargs
):
828 env
= os
.environ
.copy()
829 self
._fix
_pyinstaller
_ld
_path
(env
)
831 self
.__text
_mode
= kwargs
.get('encoding') or kwargs
.get('errors') or text
or kwargs
.get('universal_newlines')
833 kwargs
['universal_newlines'] = True # For 3.6 compatibility
834 kwargs
.setdefault('encoding', 'utf-8')
835 kwargs
.setdefault('errors', 'replace')
837 if shell
and compat_os_name
== 'nt' and kwargs
.get('executable') is None:
838 if not isinstance(args
, str):
839 args
= ' '.join(compat_shlex_quote(a
) for a
in args
)
841 args
= f
'{self.__comspec()} /Q /S /D /V:OFF /C "{args}"'
843 super().__init
__(args
, *remaining
, env
=env
, shell
=shell
, **kwargs
, startupinfo
=self
._startupinfo
)
846 comspec
= os
.environ
.get('ComSpec') or os
.path
.join(
847 os
.environ
.get('SystemRoot', ''), 'System32', 'cmd.exe')
848 if os
.path
.isabs(comspec
):
850 raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set')
852 def communicate_or_kill(self
, *args
, **kwargs
):
854 return self
.communicate(*args
, **kwargs
)
855 except BaseException
: # Including KeyboardInterrupt
856 self
.kill(timeout
=None)
859 def kill(self
, *, timeout
=0):
862 self
.wait(timeout
=timeout
)
865 def run(cls
, *args
, timeout
=None, **kwargs
):
866 with cls(*args
, **kwargs
) as proc
:
867 default
= '' if proc
.__text
_mode
else b
''
868 stdout
, stderr
= proc
.communicate_or_kill(timeout
=timeout
)
869 return stdout
or default
, stderr
or default
, proc
.returncode
872 def encodeArgument(s
):
873 # Legacy code that uses byte strings
874 # Uncomment the following line after fixing all post processors
875 # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
876 return s
if isinstance(s
, str) else s
.decode('ascii')
879 _timetuple
= collections
.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
882 def timetuple_from_msec(msec
):
883 secs
, msec
= divmod(msec
, 1000)
884 mins
, secs
= divmod(secs
, 60)
885 hrs
, mins
= divmod(mins
, 60)
886 return _timetuple(hrs
, mins
, secs
, msec
)
889 def formatSeconds(secs
, delim
=':', msec
=False):
890 time
= timetuple_from_msec(secs
* 1000)
892 ret
= '%d%s%02d%s%02d' % (time
.hours
, delim
, time
.minutes
, delim
, time
.seconds
)
894 ret
= '%d%s%02d' % (time
.minutes
, delim
, time
.seconds
)
896 ret
= '%d' % time
.seconds
897 return '%s.%03d' % (ret
, time
.milliseconds
) if msec
else ret
900 def bug_reports_message(before
=';'):
901 from ..update
import REPOSITORY
903 msg
= (f
'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
904 'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
906 before
= before
.rstrip()
907 if not before
or before
.endswith(('.', '!', '?')):
908 msg
= msg
[0].title() + msg
[1:]
910 return (before
+ ' ' if before
else '') + msg
913 class YoutubeDLError(Exception):
914 """Base exception for YoutubeDL errors."""
917 def __init__(self
, msg
=None):
920 elif self
.msg
is None:
921 self
.msg
= type(self
).__name
__
922 super().__init
__(self
.msg
)
925 class ExtractorError(YoutubeDLError
):
926 """Error during info extraction."""
928 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None, ie
=None):
929 """ tb, if given, is the original traceback (so that it can be printed out).
930 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
932 from ..networking
.exceptions
import network_exceptions
933 if sys
.exc_info()[0] in network_exceptions
:
936 self
.orig_msg
= str(msg
)
938 self
.expected
= expected
940 self
.video_id
= video_id
942 self
.exc_info
= sys
.exc_info() # preserve original exception
943 if isinstance(self
.exc_info
[1], ExtractorError
):
944 self
.exc_info
= self
.exc_info
[1].exc_info
945 super().__init
__(self
.__msg
)
950 format_field(self
.ie
, None, '[%s] '),
951 format_field(self
.video_id
, None, '%s: '),
953 format_field(self
.cause
, None, ' (caused by %r)'),
954 '' if self
.expected
else bug_reports_message()))
956 def format_traceback(self
):
957 return join_nonempty(
958 self
.traceback
and ''.join(traceback
.format_tb(self
.traceback
)),
959 self
.cause
and ''.join(traceback
.format_exception(None, self
.cause
, self
.cause
.__traceback
__)[1:]),
962 def __setattr__(self
, name
, value
):
963 super().__setattr
__(name
, value
)
964 if getattr(self
, 'msg', None) and name
not in ('msg', 'args'):
965 self
.msg
= self
.__msg
or type(self
).__name
__
966 self
.args
= (self
.msg
, ) # Cannot be property
969 class UnsupportedError(ExtractorError
):
970 def __init__(self
, url
):
972 'Unsupported URL: %s' % url
, expected
=True)
976 class RegexNotFoundError(ExtractorError
):
977 """Error when a regex didn't match"""
981 class GeoRestrictedError(ExtractorError
):
982 """Geographic restriction Error exception.
984 This exception may be thrown when a video is not available from your
985 geographic location due to geographic restrictions imposed by a website.
988 def __init__(self
, msg
, countries
=None, **kwargs
):
989 kwargs
['expected'] = True
990 super().__init
__(msg
, **kwargs
)
991 self
.countries
= countries
994 class UserNotLive(ExtractorError
):
995 """Error when a channel/user is not live"""
997 def __init__(self
, msg
=None, **kwargs
):
998 kwargs
['expected'] = True
999 super().__init
__(msg
or 'The channel is not currently live', **kwargs
)
1002 class DownloadError(YoutubeDLError
):
1003 """Download Error exception.
1005 This exception may be thrown by FileDownloader objects if they are not
1006 configured to continue on errors. They will contain the appropriate
1010 def __init__(self
, msg
, exc_info
=None):
1011 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1012 super().__init
__(msg
)
1013 self
.exc_info
= exc_info
1016 class EntryNotInPlaylist(YoutubeDLError
):
1017 """Entry not in playlist exception.
1019 This exception will be thrown by YoutubeDL when a requested entry
1020 is not found in the playlist info_dict
1022 msg
= 'Entry not found in info'
1025 class SameFileError(YoutubeDLError
):
1026 """Same File exception.
1028 This exception will be thrown by FileDownloader objects if they detect
1029 multiple files would have to be downloaded to the same file on disk.
1031 msg
= 'Fixed output name but more than one file to download'
1033 def __init__(self
, filename
=None):
1034 if filename
is not None:
1035 self
.msg
+= f
': {filename}'
1036 super().__init
__(self
.msg
)
1039 class PostProcessingError(YoutubeDLError
):
1040 """Post Processing exception.
1042 This exception may be raised by PostProcessor's .run() method to
1043 indicate an error in the postprocessing task.
1047 class DownloadCancelled(YoutubeDLError
):
1048 """ Exception raised when the download queue should be interrupted """
1049 msg
= 'The download was cancelled'
1052 class ExistingVideoReached(DownloadCancelled
):
1053 """ --break-on-existing triggered """
1054 msg
= 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1057 class RejectedVideoReached(DownloadCancelled
):
1058 """ --break-match-filter triggered """
1059 msg
= 'Encountered a video that did not match filter, stopping due to --break-match-filter'
1062 class MaxDownloadsReached(DownloadCancelled
):
1063 """ --max-downloads limit has been reached. """
1064 msg
= 'Maximum number of downloads reached, stopping due to --max-downloads'
1067 class ReExtractInfo(YoutubeDLError
):
1068 """ Video info needs to be re-extracted. """
1070 def __init__(self
, msg
, expected
=False):
1071 super().__init
__(msg
)
1072 self
.expected
= expected
1075 class ThrottledDownload(ReExtractInfo
):
1076 """ Download speed below --throttled-rate. """
1077 msg
= 'The download speed is below throttle limit'
1080 super().__init
__(self
.msg
, expected
=False)
1083 class UnavailableVideoError(YoutubeDLError
):
1084 """Unavailable Format exception.
1086 This exception will be thrown when a video is requested
1087 in a format that is not available for that video.
1089 msg
= 'Unable to download video'
1091 def __init__(self
, err
=None):
1093 self
.msg
+= f
': {err}'
1094 super().__init
__(self
.msg
)
1097 class ContentTooShortError(YoutubeDLError
):
1098 """Content Too Short exception.
1100 This exception may be raised by FileDownloader objects when a file they
1101 download is too small for what the server announced first, indicating
1102 the connection was probably interrupted.
1105 def __init__(self
, downloaded
, expected
):
1106 super().__init
__(f
'Downloaded {downloaded} bytes, expected {expected} bytes')
1108 self
.downloaded
= downloaded
1109 self
.expected
= expected
1112 class XAttrMetadataError(YoutubeDLError
):
1113 def __init__(self
, code
=None, msg
='Unknown error'):
1114 super().__init
__(msg
)
1118 # Parsing code and msg
1119 if (self
.code
in (errno
.ENOSPC
, errno
.EDQUOT
)
1120 or 'No space left' in self
.msg
or 'Disk quota exceeded' in self
.msg
):
1121 self
.reason
= 'NO_SPACE'
1122 elif self
.code
== errno
.E2BIG
or 'Argument list too long' in self
.msg
:
1123 self
.reason
= 'VALUE_TOO_LONG'
1125 self
.reason
= 'NOT_SUPPORTED'
1128 class XAttrUnavailableError(YoutubeDLError
):
1132 def is_path_like(f
):
1133 return isinstance(f
, (str, bytes, os
.PathLike
))
1136 def extract_timezone(date_str
):
1139 ^.{8,}? # >=8 char non-TZ prefix, if present
1140 (?P<tz>Z| # just the UTC Z, or
1141 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
1142 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1143 [ ]? # optional space
1144 (?P<sign>\+|-) # +/-
1145 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
1149 m
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
)
1150 timezone
= TIMEZONE_NAMES
.get(m
and m
.group('tz').strip())
1151 if timezone
is not None:
1152 date_str
= date_str
[:-len(m
.group('tz'))]
1153 timezone
= datetime
.timedelta(hours
=timezone
or 0)
1155 date_str
= date_str
[:-len(m
.group('tz'))]
1156 if not m
.group('sign'):
1157 timezone
= datetime
.timedelta()
1159 sign
= 1 if m
.group('sign') == '+' else -1
1160 timezone
= datetime
.timedelta(
1161 hours
=sign
* int(m
.group('hours')),
1162 minutes
=sign
* int(m
.group('minutes')))
1163 return timezone
, date_str
1166 def parse_iso8601(date_str
, delimiter
='T', timezone
=None):
1167 """ Return a UNIX timestamp from the given date """
1169 if date_str
is None:
1172 date_str
= re
.sub(r
'\.[0-9]+', '', date_str
)
1174 if timezone
is None:
1175 timezone
, date_str
= extract_timezone(date_str
)
1177 with contextlib
.suppress(ValueError):
1178 date_format
= f
'%Y-%m-%d{delimiter}%H:%M:%S'
1179 dt
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
1180 return calendar
.timegm(dt
.timetuple())
1183 def date_formats(day_first
=True):
1184 return DATE_FORMATS_DAY_FIRST
if day_first
else DATE_FORMATS_MONTH_FIRST
1187 def unified_strdate(date_str
, day_first
=True):
1188 """Return a string with the date in the format YYYYMMDD"""
1190 if date_str
is None:
1194 date_str
= date_str
.replace(',', ' ')
1195 # Remove AM/PM + timezone
1196 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
1197 _
, date_str
= extract_timezone(date_str
)
1199 for expression
in date_formats(day_first
):
1200 with contextlib
.suppress(ValueError):
1201 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
1202 if upload_date
is None:
1203 timetuple
= email
.utils
.parsedate_tz(date_str
)
1205 with contextlib
.suppress(ValueError):
1206 upload_date
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d')
1207 if upload_date
is not None:
1208 return str(upload_date
)
1211 def unified_timestamp(date_str
, day_first
=True):
1212 if not isinstance(date_str
, str):
1215 date_str
= re
.sub(r
'\s+', ' ', re
.sub(
1216 r
'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str
))
1218 pm_delta
= 12 if re
.search(r
'(?i)PM', date_str
) else 0
1219 timezone
, date_str
= extract_timezone(date_str
)
1221 # Remove AM/PM + timezone
1222 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
1224 # Remove unrecognized timezones from ISO 8601 alike timestamps
1225 m
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
)
1227 date_str
= date_str
[:-len(m
.group('tz'))]
1229 # Python only supports microseconds, so remove nanoseconds
1230 m
= re
.search(r
'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str
)
1232 date_str
= m
.group(1)
1234 for expression
in date_formats(day_first
):
1235 with contextlib
.suppress(ValueError):
1236 dt
= datetime
.datetime
.strptime(date_str
, expression
) - timezone
+ datetime
.timedelta(hours
=pm_delta
)
1237 return calendar
.timegm(dt
.timetuple())
1239 timetuple
= email
.utils
.parsedate_tz(date_str
)
1241 return calendar
.timegm(timetuple
) + pm_delta
* 3600 - timezone
.total_seconds()
1244 def determine_ext(url
, default_ext
='unknown_video'):
1245 if url
is None or '.' not in url
:
1247 guess
= url
.partition('?')[0].rpartition('.')[2]
1248 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
1250 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1251 elif guess
.rstrip('/') in KNOWN_EXTENSIONS
:
1252 return guess
.rstrip('/')
1257 def subtitles_filename(filename
, sub_lang
, sub_format
, expected_real_ext
=None):
1258 return replace_extension(filename
, sub_lang
+ '.' + sub_format
, expected_real_ext
)
1261 def datetime_from_str(date_str
, precision
='auto', format
='%Y%m%d'):
1263 Return a datetime object from a string.
1265 (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1267 @param format strftime format of DATE
1268 @param precision Round the datetime object: auto|microsecond|second|minute|hour|day
1269 auto: round to the unit provided in date_str (if applicable).
1271 auto_precision
= False
1272 if precision
== 'auto':
1273 auto_precision
= True
1274 precision
= 'microsecond'
1275 today
= datetime_round(datetime
.datetime
.now(datetime
.timezone
.utc
), precision
)
1276 if date_str
in ('now', 'today'):
1278 if date_str
== 'yesterday':
1279 return today
- datetime
.timedelta(days
=1)
1281 r
'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1283 if match
is not None:
1284 start_time
= datetime_from_str(match
.group('start'), precision
, format
)
1285 time
= int(match
.group('time')) * (-1 if match
.group('sign') == '-' else 1)
1286 unit
= match
.group('unit')
1287 if unit
== 'month' or unit
== 'year':
1288 new_date
= datetime_add_months(start_time
, time
* 12 if unit
== 'year' else time
)
1294 delta
= datetime
.timedelta(**{unit + 's': time}
)
1295 new_date
= start_time
+ delta
1297 return datetime_round(new_date
, unit
)
1300 return datetime_round(datetime
.datetime
.strptime(date_str
, format
), precision
)
1303 def date_from_str(date_str
, format
='%Y%m%d', strict
=False):
1305 Return a date object from a string using datetime_from_str
1307 @param strict Restrict allowed patterns to "YYYYMMDD" and
1308 (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1310 if strict
and not re
.fullmatch(r
'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str
):
1311 raise ValueError(f
'Invalid date format "{date_str}"')
1312 return datetime_from_str(date_str
, precision
='microsecond', format
=format
).date()
1315 def datetime_add_months(dt
, months
):
1316 """Increment/Decrement a datetime object by months."""
1317 month
= dt
.month
+ months
- 1
1318 year
= dt
.year
+ month
// 12
1319 month
= month
% 12 + 1
1320 day
= min(dt
.day
, calendar
.monthrange(year
, month
)[1])
1321 return dt
.replace(year
, month
, day
)
1324 def datetime_round(dt
, precision
='day'):
1326 Round a datetime object's time to a specific precision
1328 if precision
== 'microsecond':
1337 roundto
= lambda x
, n
: ((x
+ n
/ 2) // n
) * n
1338 timestamp
= roundto(calendar
.timegm(dt
.timetuple()), unit_seconds
[precision
])
1339 return datetime
.datetime
.fromtimestamp(timestamp
, datetime
.timezone
.utc
)
1342 def hyphenate_date(date_str
):
1344 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1345 match
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
)
1346 if match
is not None:
1347 return '-'.join(match
.groups())
1353 """Represents a time interval between two dates"""
1355 def __init__(self
, start
=None, end
=None):
1356 """start and end must be strings in the format accepted by date"""
1357 if start
is not None:
1358 self
.start
= date_from_str(start
, strict
=True)
1360 self
.start
= datetime
.datetime
.min.date()
1362 self
.end
= date_from_str(end
, strict
=True)
1364 self
.end
= datetime
.datetime
.max.date()
1365 if self
.start
> self
.end
:
1366 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
1370 """Returns a range that only contains the given day"""
1371 return cls(day
, day
)
1373 def __contains__(self
, date
):
1374 """Check if the date is in the range"""
1375 if not isinstance(date
, datetime
.date
):
1376 date
= date_from_str(date
)
1377 return self
.start
<= date
<= self
.end
1380 return f
'{__name__}.{type(self).__name__}({self.start.isoformat()!r}, {self.end.isoformat()!r})'
1382 def __eq__(self
, other
):
1383 return (isinstance(other
, DateRange
)
1384 and self
.start
== other
.start
and self
.end
== other
.end
)
1388 def system_identifier():
1389 python_implementation
= platform
.python_implementation()
1390 if python_implementation
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
1391 python_implementation
+= ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
1393 with contextlib
.suppress(OSError): # We may not have access to the executable
1394 libc_ver
= platform
.libc_ver()
1396 return 'Python %s (%s %s %s) - %s (%s%s)' % (
1397 platform
.python_version(),
1398 python_implementation
,
1400 platform
.architecture()[0],
1401 platform
.platform(),
1402 ssl
.OPENSSL_VERSION
,
1403 format_field(join_nonempty(*libc_ver
, delim
=' '), None, ', %s'),
1408 def get_windows_version():
1409 ''' Get Windows version. returns () if it's not running on Windows '''
1410 if compat_os_name
== 'nt':
1411 return version_tuple(platform
.win32_ver()[1])
1416 def write_string(s
, out
=None, encoding
=None):
1417 assert isinstance(s
, str)
1418 out
= out
or sys
.stderr
1419 # `sys.stderr` might be `None` (Ref: https://github.com/pyinstaller/pyinstaller/pull/7217)
1423 if compat_os_name
== 'nt' and supports_terminal_sequences(out
):
1424 s
= re
.sub(r
'([\r\n]+)', r
' \1', s
)
1426 enc
, buffer = None, out
1427 if 'b' in getattr(out
, 'mode', ''):
1428 enc
= encoding
or preferredencoding()
1429 elif hasattr(out
, 'buffer'):
1431 enc
= encoding
or getattr(out
, 'encoding', None) or preferredencoding()
1433 buffer.write(s
.encode(enc
, 'ignore') if enc
else s
)
1437 # TODO: Use global logger
1438 def deprecation_warning(msg
, *, printer
=None, stacklevel
=0, **kwargs
):
1439 from .. import _IN_CLI
1441 if msg
in deprecation_warning
._cache
:
1443 deprecation_warning
._cache
.add(msg
)
1445 return printer(f
'{msg}{bug_reports_message()}', **kwargs
)
1446 return write_string(f
'ERROR: {msg}{bug_reports_message()}\n', **kwargs
)
1449 warnings
.warn(DeprecationWarning(msg
), stacklevel
=stacklevel
+ 3)
1452 deprecation_warning
._cache
= set()
1455 def bytes_to_intlist(bs
):
1458 if isinstance(bs
[0], int): # Python 3
1461 return [ord(c
) for c
in bs
]
1464 def intlist_to_bytes(xs
):
1467 return struct
.pack('%dB' % len(xs
), *xs
)
1470 class LockingUnsupportedError(OSError):
1471 msg
= 'File locking is not supported'
1474 super().__init
__(self
.msg
)
1477 # Cross-platform file locking
1478 if sys
.platform
== 'win32':
1480 import ctypes
.wintypes
1483 class OVERLAPPED(ctypes
.Structure
):
1485 ('Internal', ctypes
.wintypes
.LPVOID
),
1486 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
1487 ('Offset', ctypes
.wintypes
.DWORD
),
1488 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
1489 ('hEvent', ctypes
.wintypes
.HANDLE
),
1492 kernel32
= ctypes
.WinDLL('kernel32')
1493 LockFileEx
= kernel32
.LockFileEx
1494 LockFileEx
.argtypes
= [
1495 ctypes
.wintypes
.HANDLE
, # hFile
1496 ctypes
.wintypes
.DWORD
, # dwFlags
1497 ctypes
.wintypes
.DWORD
, # dwReserved
1498 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
1499 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
1500 ctypes
.POINTER(OVERLAPPED
) # Overlapped
1502 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
1503 UnlockFileEx
= kernel32
.UnlockFileEx
1504 UnlockFileEx
.argtypes
= [
1505 ctypes
.wintypes
.HANDLE
, # hFile
1506 ctypes
.wintypes
.DWORD
, # dwReserved
1507 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
1508 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
1509 ctypes
.POINTER(OVERLAPPED
) # Overlapped
1511 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
1512 whole_low
= 0xffffffff
1513 whole_high
= 0x7fffffff
1515 def _lock_file(f
, exclusive
, block
):
1516 overlapped
= OVERLAPPED()
1517 overlapped
.Offset
= 0
1518 overlapped
.OffsetHigh
= 0
1519 overlapped
.hEvent
= 0
1520 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
1522 if not LockFileEx(msvcrt
.get_osfhandle(f
.fileno()),
1523 (0x2 if exclusive
else 0x0) |
(0x0 if block
else 0x1),
1524 0, whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1525 # NB: No argument form of "ctypes.FormatError" does not work on PyPy
1526 raise BlockingIOError(f
'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
1528 def _unlock_file(f
):
1529 assert f
._lock
_file
_overlapped
_p
1530 handle
= msvcrt
.get_osfhandle(f
.fileno())
1531 if not UnlockFileEx(handle
, 0, whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1532 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
1538 def _lock_file(f
, exclusive
, block
):
1539 flags
= fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
1541 flags |
= fcntl
.LOCK_NB
1543 fcntl
.flock(f
, flags
)
1544 except BlockingIOError
:
1546 except OSError: # AOSP does not have flock()
1547 fcntl
.lockf(f
, flags
)
1549 def _unlock_file(f
):
1550 with contextlib
.suppress(OSError):
1551 return fcntl
.flock(f
, fcntl
.LOCK_UN
)
1552 with contextlib
.suppress(OSError):
1553 return fcntl
.lockf(f
, fcntl
.LOCK_UN
) # AOSP does not have flock()
1554 return fcntl
.flock(f
, fcntl
.LOCK_UN | fcntl
.LOCK_NB
) # virtiofs needs LOCK_NB on unlocking
1558 def _lock_file(f
, exclusive
, block
):
1559 raise LockingUnsupportedError()
1561 def _unlock_file(f
):
1562 raise LockingUnsupportedError()
1568 def __init__(self
, filename
, mode
, block
=True, encoding
=None):
1569 if mode
not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}
:
1570 raise NotImplementedError(mode
)
1571 self
.mode
, self
.block
= mode
, block
1573 writable
= any(f
in mode
for f
in 'wax+')
1574 readable
= any(f
in mode
for f
in 'r+')
1575 flags
= functools
.reduce(operator
.ior
, (
1576 getattr(os
, 'O_CLOEXEC', 0), # UNIX only
1577 getattr(os
, 'O_BINARY', 0), # Windows only
1578 getattr(os
, 'O_NOINHERIT', 0), # Windows only
1579 os
.O_CREAT
if writable
else 0, # O_TRUNC only after locking
1580 os
.O_APPEND
if 'a' in mode
else 0,
1581 os
.O_EXCL
if 'x' in mode
else 0,
1582 os
.O_RDONLY
if not writable
else os
.O_RDWR
if readable
else os
.O_WRONLY
,
1585 self
.f
= os
.fdopen(os
.open(filename
, flags
, 0o666), mode
, encoding
=encoding
)
1587 def __enter__(self
):
1588 exclusive
= 'r' not in self
.mode
1590 _lock_file(self
.f
, exclusive
, self
.block
)
1595 if 'w' in self
.mode
:
1598 except OSError as e
:
1600 errno
.ESPIPE
, # Illegal seek - expected for FIFO
1601 errno
.EINVAL
, # Invalid argument - expected for /dev/null
1610 _unlock_file(self
.f
)
1614 def __exit__(self
, *_
):
1623 def __getattr__(self
, attr
):
1624 return getattr(self
.f
, attr
)
1631 def get_filesystem_encoding():
1632 encoding
= sys
.getfilesystemencoding()
1633 return encoding
if encoding
is not None else 'utf-8'
1636 def shell_quote(args
):
1638 encoding
= get_filesystem_encoding()
1640 if isinstance(a
, bytes):
1641 # We may get a filename encoded with 'encodeFilename'
1642 a
= a
.decode(encoding
)
1643 quoted_args
.append(compat_shlex_quote(a
))
1644 return ' '.join(quoted_args
)
1647 def smuggle_url(url
, data
):
1648 """ Pass additional data in a URL for internal use. """
1650 url
, idata
= unsmuggle_url(url
, {})
1652 sdata
= urllib
.parse
.urlencode(
1653 {'__youtubedl_smuggle': json.dumps(data)}
)
1654 return url
+ '#' + sdata
1657 def unsmuggle_url(smug_url
, default
=None):
1658 if '#__youtubedl_smuggle' not in smug_url
:
1659 return smug_url
, default
1660 url
, _
, sdata
= smug_url
.rpartition('#')
1661 jsond
= urllib
.parse
.parse_qs(sdata
)['__youtubedl_smuggle'][0]
1662 data
= json
.loads(jsond
)
1666 def format_decimal_suffix(num
, fmt
='%d%s', *, factor
=1000):
1667 """ Formats numbers with decimal sufixes like K, M, etc """
1668 num
, factor
= float_or_none(num
), float(factor
)
1669 if num
is None or num
< 0:
1671 POSSIBLE_SUFFIXES
= 'kMGTPEZY'
1672 exponent
= 0 if num
== 0 else min(int(math
.log(num
, factor
)), len(POSSIBLE_SUFFIXES
))
1673 suffix
= ['', *POSSIBLE_SUFFIXES
][exponent
]
1675 suffix
= {'k': 'Ki', '': ''}
.get(suffix
, f
'{suffix}i')
1676 converted
= num
/ (factor
** exponent
)
1677 return fmt
% (converted
, suffix
)
1680 def format_bytes(bytes):
1681 return format_decimal_suffix(bytes, '%.2f%sB', factor
=1024) or 'N/A'
1684 def lookup_unit_table(unit_table
, s
, strict
=False):
1685 num_re
= NUMBER_RE
if strict
else NUMBER_RE
.replace(R
'\.', '[,.]')
1686 units_re
= '|'.join(re
.escape(u
) for u
in unit_table
)
1687 m
= (re
.fullmatch
if strict
else re
.match
)(
1688 rf
'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s
)
1692 num
= float(m
.group('num').replace(',', '.'))
1693 mult
= unit_table
[m
.group('unit')]
1694 return round(num
* mult
)
1698 """Parse a string indicating a byte quantity into an integer"""
1699 return lookup_unit_table(
1700 {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])}
,
1701 s
.upper(), strict
=True)
1704 def parse_filesize(s
):
1708 # The lower-case forms are of course incorrect and unofficial,
1709 # but we support those too
1726 'megabytes': 1000 ** 2,
1727 'mebibytes': 1024 ** 2,
1733 'gigabytes': 1000 ** 3,
1734 'gibibytes': 1024 ** 3,
1740 'terabytes': 1000 ** 4,
1741 'tebibytes': 1024 ** 4,
1747 'petabytes': 1000 ** 5,
1748 'pebibytes': 1024 ** 5,
1754 'exabytes': 1000 ** 6,
1755 'exbibytes': 1024 ** 6,
1761 'zettabytes': 1000 ** 7,
1762 'zebibytes': 1024 ** 7,
1768 'yottabytes': 1000 ** 8,
1769 'yobibytes': 1024 ** 8,
1772 return lookup_unit_table(_UNIT_TABLE
, s
)
1779 s
= re
.sub(r
'^[^\d]+\s', '', s
).strip()
1781 if re
.match(r
'^[\d,.]+$', s
):
1782 return str_to_int(s
)
1795 ret
= lookup_unit_table(_UNIT_TABLE
, s
)
1799 mobj
= re
.match(r
'([\d,.]+)(?:$|\s)', s
)
1801 return str_to_int(mobj
.group(1))
1804 def parse_resolution(s
, *, lenient
=False):
1809 mobj
= re
.search(r
'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s
)
1811 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s
)
1814 'width': int(mobj
.group('w')),
1815 'height': int(mobj
.group('h')),
1818 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s
)
1820 return {'height': int(mobj.group(1))}
1822 mobj
= re
.search(r
'\b([48])[kK]\b', s
)
1824 return {'height': int(mobj.group(1)) * 540}
1829 def parse_bitrate(s
):
1830 if not isinstance(s
, str):
1832 mobj
= re
.search(r
'\b(\d+)\s*kbps', s
)
1834 return int(mobj
.group(1))
1837 def month_by_name(name
, lang
='en'):
1838 """ Return the number of a month by (locale-independently) English name """
1840 month_names
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en'])
1843 return month_names
.index(name
) + 1
1848 def month_by_abbreviation(abbrev
):
1849 """ Return the number of a month by (locale-independently) English
1853 return [s
[:3] for s
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1
1858 def fix_xml_ampersands(xml_str
):
1859 """Replace all the '&' by '&' in XML"""
1861 r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1866 def setproctitle(title
):
1867 assert isinstance(title
, str)
1869 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
1876 libc
= ctypes
.cdll
.LoadLibrary('libc.so.6')
1880 # LoadLibrary in Windows Python 2.7.13 only expects
1881 # a bytestring, but since unicode_literals turns
1882 # every string into a unicode string, it fails.
1884 title_bytes
= title
.encode()
1885 buf
= ctypes
.create_string_buffer(len(title_bytes
))
1886 buf
.value
= title_bytes
1888 # PR_SET_NAME = 15 Ref: /usr/include/linux/prctl.h
1889 libc
.prctl(15, buf
, 0, 0, 0)
1890 except AttributeError:
1891 return # Strange libc, just skip this
1894 def remove_start(s
, start
):
1895 return s
[len(start
):] if s
is not None and s
.startswith(start
) else s
1898 def remove_end(s
, end
):
1899 return s
[:-len(end
)] if s
is not None and s
.endswith(end
) else s
1902 def remove_quotes(s
):
1903 if s
is None or len(s
) < 2:
1905 for quote
in ('"', "'", ):
1906 if s
[0] == quote
and s
[-1] == quote
:
1911 def get_domain(url
):
1913 This implementation is inconsistent, but is kept for compatibility.
1914 Use this only for "webpage_url_domain"
1916 return remove_start(urllib
.parse
.urlparse(url
).netloc
, 'www.') or None
1919 def url_basename(url
):
1920 path
= urllib
.parse
.urlparse(url
).path
1921 return path
.strip('/').split('/')[-1]
1925 return re
.match(r
'https?://[^?#]+/', url
).group()
1928 def urljoin(base
, path
):
1929 if isinstance(path
, bytes):
1930 path
= path
.decode()
1931 if not isinstance(path
, str) or not path
:
1933 if re
.match(r
'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path
):
1935 if isinstance(base
, bytes):
1936 base
= base
.decode()
1937 if not isinstance(base
, str) or not re
.match(
1938 r
'^(?:https?:)?//', base
):
1940 return urllib
.parse
.urljoin(base
, path
)
1943 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1):
1944 if get_attr
and v
is not None:
1945 v
= getattr(v
, get_attr
, None)
1947 return int(v
) * invscale
// scale
1948 except (ValueError, TypeError, OverflowError):
1952 def str_or_none(v
, default
=None):
1953 return default
if v
is None else str(v
)
1956 def str_to_int(int_str
):
1957 """ A more relaxed version of int_or_none """
1958 if isinstance(int_str
, int):
1960 elif isinstance(int_str
, str):
1961 int_str
= re
.sub(r
'[,\.\+]', '', int_str
)
1962 return int_or_none(int_str
)
1965 def float_or_none(v
, scale
=1, invscale
=1, default
=None):
1969 return float(v
) * invscale
/ scale
1970 except (ValueError, TypeError):
1974 def bool_or_none(v
, default
=None):
1975 return v
if isinstance(v
, bool) else default
1978 def strip_or_none(v
, default
=None):
1979 return v
.strip() if isinstance(v
, str) else default
1982 def url_or_none(url
):
1983 if not url
or not isinstance(url
, str):
1986 return url
if re
.match(r
'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url
) else None
1989 def strftime_or_none(timestamp
, date_format
='%Y%m%d', default
=None):
1990 datetime_object
= None
1992 if isinstance(timestamp
, (int, float)): # unix timestamp
1993 # Using naive datetime here can break timestamp() in Windows
1994 # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
1995 # Also, datetime.datetime.fromtimestamp breaks for negative timestamps
1996 # Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
1997 datetime_object
= (datetime
.datetime
.fromtimestamp(0, datetime
.timezone
.utc
)
1998 + datetime
.timedelta(seconds
=timestamp
))
1999 elif isinstance(timestamp
, str): # assume YYYYMMDD
2000 datetime_object
= datetime
.datetime
.strptime(timestamp
, '%Y%m%d')
2001 date_format
= re
.sub( # Support %s on windows
2002 r
'(?<!%)(%%)*%s', rf
'\g<1>{int(datetime_object.timestamp())}', date_format
)
2003 return datetime_object
.strftime(date_format
)
2004 except (ValueError, TypeError, AttributeError):
2008 def parse_duration(s
):
2009 if not isinstance(s
, str):
2015 days
, hours
, mins
, secs
, ms
= [None] * 5
2016 m
= re
.match(r
'''(?x)
2018 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2019 (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2020 (?P<ms>[.:][0-9]+)?Z?$
2023 days
, hours
, mins
, secs
, ms
= m
.group('days', 'hours', 'mins', 'secs', 'ms')
2028 [0-9]+\s*y(?:ears?)?,?\s*
2031 [0-9]+\s*m(?:onths?)?,?\s*
2034 [0-9]+\s*w(?:eeks?)?,?\s*
2037 (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2041 (?P<hours>[0-9]+)\s*h(?:(?:ou)?rs?)?,?\s*
2044 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2047 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2050 days
, hours
, mins
, secs
, ms
= m
.groups()
2052 m
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
)
2054 hours
, mins
= m
.groups()
2059 ms
= ms
.replace(':', '.')
2060 return sum(float(part
or 0) * mult
for part
, mult
in (
2061 (days
, 86400), (hours
, 3600), (mins
, 60), (secs
, 1), (ms
, 1)))
2064 def prepend_extension(filename
, ext
, expected_real_ext
=None):
2065 name
, real_ext
= os
.path
.splitext(filename
)
2067 f
'{name}.{ext}{real_ext}'
2068 if not expected_real_ext
or real_ext
[1:] == expected_real_ext
2069 else f
'{filename}.{ext}')
2072 def replace_extension(filename
, ext
, expected_real_ext
=None):
2073 name
, real_ext
= os
.path
.splitext(filename
)
2074 return '{}.{}'.format(
2075 name
if not expected_real_ext
or real_ext
[1:] == expected_real_ext
else filename
,
2079 def check_executable(exe
, args
=[]):
2080 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2081 args can be a list of arguments for a short output (like -version) """
2083 Popen
.run([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2089 def _get_exe_version_output(exe
, args
):
2091 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2092 # SIGTTOU if yt-dlp is run in the background.
2093 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2094 stdout
, _
, ret
= Popen
.run([encodeArgument(exe
)] + args
, text
=True,
2095 stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
)
2103 def detect_exe_version(output
, version_re
=None, unrecognized
='present'):
2104 assert isinstance(output
, str)
2105 if version_re
is None:
2106 version_re
= r
'version\s+([-0-9._a-zA-Z]+)'
2107 m
= re
.search(version_re
, output
)
2114 def get_exe_version(exe
, args
=['--version'],
2115 version_re
=None, unrecognized
=('present', 'broken')):
2116 """ Returns the version of the specified executable,
2117 or False if the executable is not present """
2118 unrecognized
= variadic(unrecognized
)
2119 assert len(unrecognized
) in (1, 2)
2120 out
= _get_exe_version_output(exe
, args
)
2122 return unrecognized
[-1]
2123 return out
and detect_exe_version(out
, version_re
, unrecognized
[0])
2126 def frange(start
=0, stop
=None, step
=1):
2129 start
, stop
= 0, start
2130 sign
= [-1, 1][step
> 0] if step
else 0
2131 while sign
* start
< sign
* stop
:
2136 class LazyList(collections
.abc
.Sequence
):
2137 """Lazy immutable list from an iterable
2138 Note that slices of a LazyList are lists and not LazyList"""
2140 class IndexError(IndexError):
2143 def __init__(self
, iterable
, *, reverse
=False, _cache
=None):
2144 self
._iterable
= iter(iterable
)
2145 self
._cache
= [] if _cache
is None else _cache
2146 self
._reversed
= reverse
2150 # We need to consume the entire iterable to iterate in reverse
2151 yield from self
.exhaust()
2153 yield from self
._cache
2154 for item
in self
._iterable
:
2155 self
._cache
.append(item
)
2159 self
._cache
.extend(self
._iterable
)
2160 self
._iterable
= [] # Discard the emptied iterable to make it pickle-able
2164 """Evaluate the entire iterable"""
2165 return self
._exhaust
()[::-1 if self
._reversed
else 1]
2168 def _reverse_index(x
):
2169 return None if x
is None else ~x
2171 def __getitem__(self
, idx
):
2172 if isinstance(idx
, slice):
2174 idx
= slice(self
._reverse
_index
(idx
.start
), self
._reverse
_index
(idx
.stop
), -(idx
.step
or 1))
2175 start
, stop
, step
= idx
.start
, idx
.stop
, idx
.step
or 1
2176 elif isinstance(idx
, int):
2178 idx
= self
._reverse
_index
(idx
)
2179 start
, stop
, step
= idx
, idx
, 0
2181 raise TypeError('indices must be integers or slices')
2182 if ((start
or 0) < 0 or (stop
or 0) < 0
2183 or (start
is None and step
< 0)
2184 or (stop
is None and step
> 0)):
2185 # We need to consume the entire iterable to be able to slice from the end
2186 # Obviously, never use this with infinite iterables
2189 return self
._cache
[idx
]
2190 except IndexError as e
:
2191 raise self
.IndexError(e
) from e
2192 n
= max(start
or 0, stop
or 0) - len(self
._cache
) + 1
2194 self
._cache
.extend(itertools
.islice(self
._iterable
, n
))
2196 return self
._cache
[idx
]
2197 except IndexError as e
:
2198 raise self
.IndexError(e
) from e
2202 self
[-1] if self
._reversed
else self
[0]
2203 except self
.IndexError:
2209 return len(self
._cache
)
2211 def __reversed__(self
):
2212 return type(self
)(self
._iterable
, reverse
=not self
._reversed
, _cache
=self
._cache
)
2215 return type(self
)(self
._iterable
, reverse
=self
._reversed
, _cache
=self
._cache
)
2218 # repr and str should mimic a list. So we exhaust the iterable
2219 return repr(self
.exhaust())
2222 return repr(self
.exhaust())
2227 class IndexError(IndexError):
2231 # This is only useful for tests
2232 return len(self
.getslice())
2234 def __init__(self
, pagefunc
, pagesize
, use_cache
=True):
2235 self
._pagefunc
= pagefunc
2236 self
._pagesize
= pagesize
2237 self
._pagecount
= float('inf')
2238 self
._use
_cache
= use_cache
2241 def getpage(self
, pagenum
):
2242 page_results
= self
._cache
.get(pagenum
)
2243 if page_results
is None:
2244 page_results
= [] if pagenum
> self
._pagecount
else list(self
._pagefunc
(pagenum
))
2246 self
._cache
[pagenum
] = page_results
2249 def getslice(self
, start
=0, end
=None):
2250 return list(self
._getslice
(start
, end
))
2252 def _getslice(self
, start
, end
):
2253 raise NotImplementedError('This method must be implemented by subclasses')
2255 def __getitem__(self
, idx
):
2256 assert self
._use
_cache
, 'Indexing PagedList requires cache'
2257 if not isinstance(idx
, int) or idx
< 0:
2258 raise TypeError('indices must be non-negative integers')
2259 entries
= self
.getslice(idx
, idx
+ 1)
2261 raise self
.IndexError()
2265 return bool(self
.getslice(0, 1))
2268 class OnDemandPagedList(PagedList
):
2269 """Download pages until a page with less than maximum results"""
2271 def _getslice(self
, start
, end
):
2272 for pagenum
in itertools
.count(start
// self
._pagesize
):
2273 firstid
= pagenum
* self
._pagesize
2274 nextfirstid
= pagenum
* self
._pagesize
+ self
._pagesize
2275 if start
>= nextfirstid
:
2279 start
% self
._pagesize
2280 if firstid
<= start
< nextfirstid
2283 ((end
- 1) % self
._pagesize
) + 1
2284 if (end
is not None and firstid
<= end
<= nextfirstid
)
2288 page_results
= self
.getpage(pagenum
)
2290 self
._pagecount
= pagenum
- 1
2292 if startv
!= 0 or endv
is not None:
2293 page_results
= page_results
[startv
:endv
]
2294 yield from page_results
2296 # A little optimization - if current page is not "full", ie. does
2297 # not contain page_size videos then we can assume that this page
2298 # is the last one - there are no more ids on further pages -
2299 # i.e. no need to query again.
2300 if len(page_results
) + startv
< self
._pagesize
:
2303 # If we got the whole page, but the next page is not interesting,
2304 # break out early as well
2305 if end
== nextfirstid
:
2309 class InAdvancePagedList(PagedList
):
2310 """PagedList with total number of pages known in advance"""
2312 def __init__(self
, pagefunc
, pagecount
, pagesize
):
2313 PagedList
.__init
__(self
, pagefunc
, pagesize
, True)
2314 self
._pagecount
= pagecount
2316 def _getslice(self
, start
, end
):
2317 start_page
= start
// self
._pagesize
2318 end_page
= self
._pagecount
if end
is None else min(self
._pagecount
, end
// self
._pagesize
+ 1)
2319 skip_elems
= start
- start_page
* self
._pagesize
2320 only_more
= None if end
is None else end
- start
2321 for pagenum
in range(start_page
, end_page
):
2322 page_results
= self
.getpage(pagenum
)
2324 page_results
= page_results
[skip_elems
:]
2326 if only_more
is not None:
2327 if len(page_results
) < only_more
:
2328 only_more
-= len(page_results
)
2330 yield from page_results
[:only_more
]
2332 yield from page_results
2335 class PlaylistEntries
:
2336 MissingEntry
= object()
2337 is_exhausted
= False
2339 def __init__(self
, ydl
, info_dict
):
2342 # _entries must be assigned now since infodict can change during iteration
2343 entries
= info_dict
.get('entries')
2345 raise EntryNotInPlaylist('There are no entries')
2346 elif isinstance(entries
, list):
2347 self
.is_exhausted
= True
2349 requested_entries
= info_dict
.get('requested_entries')
2350 self
.is_incomplete
= requested_entries
is not None
2351 if self
.is_incomplete
:
2352 assert self
.is_exhausted
2353 self
._entries
= [self
.MissingEntry
] * max(requested_entries
or [0])
2354 for i
, entry
in zip(requested_entries
, entries
):
2355 self
._entries
[i
- 1] = entry
2356 elif isinstance(entries
, (list, PagedList
, LazyList
)):
2357 self
._entries
= entries
2359 self
._entries
= LazyList(entries
)
2361 PLAYLIST_ITEMS_RE
= re
.compile(r
'''(?x)
2362 (?P<start>[+-]?\d+)?
2364 (?P<end>[+-]?\d+|inf(?:inite)?)?
2365 (?::(?P<step>[+-]?\d+))?
2369 def parse_playlist_items(cls
, string
):
2370 for segment
in string
.split(','):
2372 raise ValueError('There is two or more consecutive commas')
2373 mobj
= cls
.PLAYLIST_ITEMS_RE
.fullmatch(segment
)
2375 raise ValueError(f
'{segment!r} is not a valid specification')
2376 start
, end
, step
, has_range
= mobj
.group('start', 'end', 'step', 'range')
2377 if int_or_none(step
) == 0:
2378 raise ValueError(f
'Step in {segment!r} cannot be zero')
2379 yield slice(int_or_none(start
), float_or_none(end
), int_or_none(step
)) if has_range
else int(start
)
2381 def get_requested_items(self
):
2382 playlist_items
= self
.ydl
.params
.get('playlist_items')
2383 playlist_start
= self
.ydl
.params
.get('playliststart', 1)
2384 playlist_end
= self
.ydl
.params
.get('playlistend')
2385 # For backwards compatibility, interpret -1 as whole list
2386 if playlist_end
in (-1, None):
2388 if not playlist_items
:
2389 playlist_items
= f
'{playlist_start}:{playlist_end}'
2390 elif playlist_start
!= 1 or playlist_end
:
2391 self
.ydl
.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once
=True)
2393 for index
in self
.parse_playlist_items(playlist_items
):
2394 for i
, entry
in self
[index
]:
2399 # The item may have just been added to archive. Don't break due to it
2400 if not self
.ydl
.params
.get('lazy_playlist'):
2401 # TODO: Add auto-generated fields
2402 self
.ydl
._match
_entry
(entry
, incomplete
=True, silent
=True)
2403 except (ExistingVideoReached
, RejectedVideoReached
):
2406 def get_full_count(self
):
2407 if self
.is_exhausted
and not self
.is_incomplete
:
2409 elif isinstance(self
._entries
, InAdvancePagedList
):
2410 if self
._entries
._pagesize
== 1:
2411 return self
._entries
._pagecount
2413 @functools.cached_property
2415 if isinstance(self
._entries
, list):
2418 entry
= self
._entries
[i
]
2420 entry
= self
.MissingEntry
2421 if not self
.is_incomplete
:
2422 raise self
.IndexError()
2423 if entry
is self
.MissingEntry
:
2424 raise EntryNotInPlaylist(f
'Entry {i + 1} cannot be found')
2429 return type(self
.ydl
)._handle
_extraction
_exceptions
(lambda _
, i
: self
._entries
[i
])(self
.ydl
, i
)
2430 except (LazyList
.IndexError, PagedList
.IndexError):
2431 raise self
.IndexError()
2434 def __getitem__(self
, idx
):
2435 if isinstance(idx
, int):
2436 idx
= slice(idx
, idx
)
2438 # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
2439 step
= 1 if idx
.step
is None else idx
.step
2440 if idx
.start
is None:
2441 start
= 0 if step
> 0 else len(self
) - 1
2443 start
= idx
.start
- 1 if idx
.start
>= 0 else len(self
) + idx
.start
2445 # NB: Do not call len(self) when idx == [:]
2446 if idx
.stop
is None:
2447 stop
= 0 if step
< 0 else float('inf')
2449 stop
= idx
.stop
- 1 if idx
.stop
>= 0 else len(self
) + idx
.stop
2450 stop
+= [-1, 1][step
> 0]
2452 for i
in frange(start
, stop
, step
):
2456 entry
= self
._getter
(i
)
2457 except self
.IndexError:
2458 self
.is_exhausted
= True
2465 return len(tuple(self
[:]))
2467 class IndexError(IndexError):
2471 def uppercase_escape(s
):
2472 unicode_escape
= codecs
.getdecoder('unicode_escape')
2474 r
'\\U[0-9a-fA-F]{8}',
2475 lambda m
: unicode_escape(m
.group(0))[0],
2479 def lowercase_escape(s
):
2480 unicode_escape
= codecs
.getdecoder('unicode_escape')
2482 r
'\\u[0-9a-fA-F]{4}',
2483 lambda m
: unicode_escape(m
.group(0))[0],
2487 def parse_qs(url
, **kwargs
):
2488 return urllib
.parse
.parse_qs(urllib
.parse
.urlparse(url
).query
, **kwargs
)
2491 def read_batch_urls(batch_fd
):
2493 if not isinstance(url
, str):
2494 url
= url
.decode('utf-8', 'replace')
2495 BOM_UTF8
= ('\xef\xbb\xbf', '\ufeff')
2496 for bom
in BOM_UTF8
:
2497 if url
.startswith(bom
):
2498 url
= url
[len(bom
):]
2500 if not url
or url
.startswith(('#', ';', ']')):
2502 # "#" cannot be stripped out since it is part of the URI
2503 # However, it can be safely stripped out if following a whitespace
2504 return re
.split(r
'\s#', url
, 1)[0].rstrip()
2506 with contextlib
.closing(batch_fd
) as fd
:
2507 return [url
for url
in map(fixup
, fd
) if url
]
2510 def urlencode_postdata(*args
, **kargs
):
2511 return urllib
.parse
.urlencode(*args
, **kargs
).encode('ascii')
2514 def update_url(url
, *, query_update
=None, **kwargs
):
2515 """Replace URL components specified by kwargs
2516 @param url str or parse url tuple
2517 @param query_update update query
2520 if isinstance(url
, str):
2521 if not kwargs
and not query_update
:
2524 url
= urllib
.parse
.urlparse(url
)
2526 assert 'query' not in kwargs
, 'query_update and query cannot be specified at the same time'
2527 kwargs
['query'] = urllib
.parse
.urlencode({
2528 **urllib
.parse
.parse_qs(url
.query
),
2531 return urllib
.parse
.urlunparse(url
._replace
(**kwargs
))
2534 def update_url_query(url
, query
):
2535 return update_url(url
, query_update
=query
)
2538 def _multipart_encode_impl(data
, boundary
):
2539 content_type
= 'multipart/form-data; boundary=%s' % boundary
2542 for k
, v
in data
.items():
2543 out
+= b
'--' + boundary
.encode('ascii') + b
'\r\n'
2544 if isinstance(k
, str):
2546 if isinstance(v
, str):
2548 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
2549 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
2550 content
= b
'Content-Disposition: form-data; name="' + k
+ b
'"\r\n\r\n' + v
+ b
'\r\n'
2551 if boundary
.encode('ascii') in content
:
2552 raise ValueError('Boundary overlaps with data')
2555 out
+= b
'--' + boundary
.encode('ascii') + b
'--\r\n'
2557 return out
, content_type
2560 def multipart_encode(data
, boundary
=None):
2562 Encode a dict to RFC 7578-compliant form-data
2565 A dict where keys and values can be either Unicode or bytes-like
2568 If specified a Unicode object, it's used as the boundary. Otherwise
2569 a random boundary is generated.
2571 Reference: https://tools.ietf.org/html/rfc7578
2573 has_specified_boundary
= boundary
is not None
2576 if boundary
is None:
2577 boundary
= '---------------' + str(random
.randrange(0x0fffffff, 0xffffffff))
2580 out
, content_type
= _multipart_encode_impl(data
, boundary
)
2583 if has_specified_boundary
:
2587 return out
, content_type
2590 def is_iterable_like(x
, allowed_types
=collections
.abc
.Iterable
, blocked_types
=NO_DEFAULT
):
2591 if blocked_types
is NO_DEFAULT
:
2592 blocked_types
= (str, bytes, collections
.abc
.Mapping
)
2593 return isinstance(x
, allowed_types
) and not isinstance(x
, blocked_types
)
2596 def variadic(x
, allowed_types
=NO_DEFAULT
):
2597 if not isinstance(allowed_types
, (tuple, type)):
2598 deprecation_warning('allowed_types should be a tuple or a type')
2599 allowed_types
= tuple(allowed_types
)
2600 return x
if is_iterable_like(x
, blocked_types
=allowed_types
) else (x
, )
2603 def try_call(*funcs
, expected_type
=None, args
=[], kwargs
={}):
2606 val
= f(*args
, **kwargs
)
2607 except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
2610 if expected_type
is None or isinstance(val
, expected_type
):
2614 def try_get(src
, getter
, expected_type
=None):
2615 return try_call(*variadic(getter
), args
=(src
,), expected_type
=expected_type
)
2618 def filter_dict(dct
, cndn
=lambda _
, v
: v
is not None):
2619 return {k: v for k, v in dct.items() if cndn(k, v)}
2622 def merge_dicts(*dicts
):
2624 for a_dict
in dicts
:
2625 for k
, v
in a_dict
.items():
2626 if (v
is not None and k
not in merged
2627 or isinstance(v
, str) and merged
[k
] == ''):
2632 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'):
2633 return string
if isinstance(string
, str) else str(string
, encoding
, errors
)
2645 TV_PARENTAL_GUIDELINES
= {
2655 def parse_age_limit(s
):
2656 # isinstance(False, int) is True. So type() must be used instead
2657 if type(s
) is int: # noqa: E721
2658 return s
if 0 <= s
<= 21 else None
2659 elif not isinstance(s
, str):
2661 m
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
)
2663 return int(m
.group('age'))
2666 return US_RATINGS
[s
]
2667 m
= re
.match(r
'^TV[_-]?(%s)$' % '|'.join(k
[3:] for k
in TV_PARENTAL_GUIDELINES
), s
)
2669 return TV_PARENTAL_GUIDELINES
['TV-' + m
.group(1)]
2673 def strip_jsonp(code
):
2676 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
2677 (?:\s*&&\s*(?P=func_name))?
2678 \s*\(\s*(?P<callback_data>.*)\);?
2679 \s*?(?://[^\n]*)*$''',
2680 r
'\g<callback_data>', code
)
2683 def js_to_json(code
, vars={}, *, strict
=False):
2684 # vars is a dict of var, val pairs to substitute
2685 STRING_QUOTES
= '\'"`'
2686 STRING_RE
= '|'.join(rf
'{q}(?:\\.|[^\\{q}])*{q}' for q
in STRING_QUOTES
)
2687 COMMENT_RE
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
2688 SKIP_RE
= fr
'\s*(?:{COMMENT_RE})?\s*'
2690 (fr
'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
2691 (fr
'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
2694 def process_escape(match
):
2695 JSON_PASSTHROUGH_ESCAPES
= R
'"\bfnrtu'
2696 escape
= match
.group(1) or match
.group(2)
2698 return (Rf
'\{escape}' if escape
in JSON_PASSTHROUGH_ESCAPES
2699 else R
'\u00' if escape
== 'x'
2700 else '' if escape
== '\n'
2703 def template_substitute(match
):
2704 evaluated
= js_to_json(match
.group(1), vars, strict
=strict
)
2705 if evaluated
[0] == '"':
2706 return json
.loads(evaluated
)
2711 if v
in ('true', 'false', 'null'):
2713 elif v
in ('undefined', 'void 0'):
2715 elif v
.startswith('/*') or v
.startswith('//') or v
.startswith('!') or v
== ',':
2718 if v
[0] in STRING_QUOTES
:
2719 v
= re
.sub(r
'(?s)\${([^}]+)}', template_substitute
, v
[1:-1]) if v
[0] == '`' else v
[1:-1]
2720 escaped
= re
.sub(r
'(?s)(")|\\(.)', process_escape
, v
)
2721 return f
'"{escaped}"'
2723 for regex
, base
in INTEGER_TABLE
:
2724 im
= re
.match(regex
, v
)
2726 i
= int(im
.group(1), base
)
2727 return f
'"{i}":' if v
.endswith(':') else str(i
)
2733 except json
.JSONDecodeError
:
2734 return json
.dumps(vars[v
])
2741 raise ValueError(f
'Unknown value: {v}')
2743 def create_map(mobj
):
2744 return json
.dumps(dict(json
.loads(js_to_json(mobj
.group(1) or '[]', vars=vars))))
2746 code
= re
.sub(r
'(?:new\s+)?Array\((.*?)\)', r
'[\g<1>]', code
)
2747 code
= re
.sub(r
'new Map\((\[.*?\])?\)', create_map
, code
)
2749 code
= re
.sub(rf
'new Date\(({STRING_RE})\)', r
'\g<1>', code
)
2750 code
= re
.sub(r
'new \w+\((.*?)\)', lambda m
: json
.dumps(m
.group(0)), code
)
2751 code
= re
.sub(r
'parseInt\([^\d]+(\d+)[^\d]+\)', r
'\1', code
)
2752 code
= re
.sub(r
'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^
)]*["\'])\s*\)', r'\1', code)
2754 return re.sub(rf'''(?sx)
2756 {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
2757 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
2758 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
2759 [0-9]+(?={SKIP_RE}:)|
2764 def qualities(quality_ids):
2765 """ Get a numeric quality value out of a list of possible values """
2768 return quality_ids.index(qid)
2774 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
2778 'default': '%(title)s [%(id)s].%(ext)s',
2779 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
2785 'description': 'description',
2786 'annotation': 'annotations.xml',
2787 'infojson': 'info.json',
2790 'pl_thumbnail': None,
2791 'pl_description': 'description',
2792 'pl_infojson': 'info.json',
2795 # As of [1] format syntax is:
2796 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
2797 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
2798 STR_FORMAT_RE_TMPL = r'''(?x)
2799 (?<!%)(?P<prefix>(?:%%)*)
2801 (?P<has_key>\((?P<key>{0})\))?
2803 (?P<conversion>[#0\-+ ]+)?
2805 (?P<precision>\.\d+)?
2806 (?P<len_mod>[hlL])? # unused in python
2807 {1} # conversion type
2812 STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'
2815 def limit_length(s, length):
2816 """ Add ellipses to overly long strings """
2821 return s[:length - len(ELLIPSES)] + ELLIPSES
2825 def version_tuple(v):
2826 return tuple(int(e) for e in re.split(r'[-.]', v))
2829 def is_outdated_version(version, limit, assume_new=True):
2831 return not assume_new
2833 return version_tuple(version) < version_tuple(limit)
2835 return not assume_new
2838 def ytdl_is_updateable():
2839 """ Returns if yt-dlp can be updated with -U """
2841 from ..update import is_non_updateable
2843 return not is_non_updateable()
2846 def args_to_str(args):
2847 # Get a short string representation for a subprocess command
2848 return ' '.join(compat_shlex_quote(a) for a in args)
2851 def error_to_str(err):
2852 return f'{type(err).__name__}: {err}'
2855 def mimetype2ext(mt, default=NO_DEFAULT):
2856 if not isinstance(mt, str):
2857 if default is not NO_DEFAULT:
2874 'x-matroska': 'mkv',
2876 'x-mp4-fragmented': 'mp4',
2881 # application (streaming playlists)
2885 'vnd.apple.mpegurl': 'm3u8',
2886 'vnd.ms-sstr+xml': 'ism',
2887 'x-mpegurl': 'm3u8',
2891 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
2892 # Using .mp3 as it's the most popular one
2893 'audio/mpeg': 'mp3',
2894 'audio/webm': 'webm',
2895 'audio/x-matroska': 'mka',
2896 'audio/x-mpegurl': 'm3u',
2904 'x-realaudio': 'ra',
2915 'vnd.wap.wbmp': 'wbmp',
2922 'filmstrip+json': 'fs',
2923 'smptett+xml': 'tt',
2926 'x-ms-sami': 'sami',
2935 mimetype = mt.partition(';')[0].strip().lower()
2936 _, _, subtype = mimetype.rpartition('/')
2938 ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
2941 elif default is not NO_DEFAULT:
2943 return subtype.replace('+', '.')
2946 def ext2mimetype(ext_or_url):
2949 if '.' not in ext_or_url:
2950 ext_or_url = f'file.{ext_or_url}'
2951 return mimetypes.guess_type(ext_or_url)[0]
2954 def parse_codecs(codecs_str):
2955 # http://tools.ietf.org/html/rfc6381
2958 split_codecs = list(filter(None, map(
2959 str.strip, codecs_str.strip().strip(',').split(','))))
2960 vcodec, acodec, scodec, hdr = None, None, None, None
2961 for full_codec in split_codecs:
2962 parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
2963 if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
2964 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
2968 if parts[0] in ('dvh1', 'dvhe'):
2970 elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
2972 elif parts[:2] == ['vp9', '2']:
2974 elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
2975 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
2976 acodec = acodec or full_codec
2977 elif parts[0] in ('stpp', 'wvtt'):
2978 scodec = scodec or full_codec
2980 write_string(f'WARNING: Unknown codec {full_codec}\n')
2981 if vcodec or acodec or scodec:
2983 'vcodec': vcodec or 'none',
2984 'acodec': acodec or 'none',
2985 'dynamic_range': hdr,
2986 **({'scodec': scodec} if scodec is not None else {}),
2988 elif len(split_codecs) == 2:
2990 'vcodec': split_codecs[0],
2991 'acodec': split_codecs[1],
2996 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
2997 assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
2999 allow_mkv = not preferences or 'mkv' in preferences
3001 if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3002 return 'mkv' # TODO: any other format allows this?
3004 # TODO: All codecs supported by parse_codecs isn't handled here
3005 COMPATIBLE_CODECS = {
3007 'av1', 'hevc', 'avc1', 'mp4a', 'ac-4', # fourcc (m3u8, mpd)
3008 'h264', 'aacl', 'ec-3', # Set in ISM
3011 'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3012 'vp9x', 'vp8x', # in the webm spec
3016 sanitize_codec = functools.partial(
3017 try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
3018 vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3020 for ext in preferences or COMPATIBLE_CODECS.keys():
3021 codec_set = COMPATIBLE_CODECS.get(ext, set())
3022 if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3026 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3029 for ext in preferences or vexts:
3030 current_exts = {ext, *vexts, *aexts}
3031 if ext == 'mkv' or current_exts == {ext} or any(
3032 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3034 return 'mkv' if allow_mkv else preferences[-1]
3037 def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
3038 getheader = url_handle.headers.get
3040 cd = getheader('Content-Disposition')
3042 m = re.match(r'attachment;\s*filename="(?P
<filename
>[^
"]+)"', cd)
3044 e = determine_ext(m.group('filename
'), default_ext=None)
3048 meta_ext = getheader('x
-amz
-meta
-name
')
3050 e = meta_ext.rpartition('.')[2]
3054 return mimetype2ext(getheader('Content
-Type
'), default=default)
3057 def encode_data_uri(data, mime_type):
3058 return 'data
:%s;base64
,%s' % (mime_type, base64.b64encode(data).decode('ascii
'))
3061 def age_restricted(content_limit, age_limit):
3062 """ Returns True iff the content should be blocked """
3064 if age_limit is None: # No limit set
3066 if content_limit is None:
3067 return False # Content available for everyone
3068 return age_limit < content_limit
3071 # List of known byte-order-marks (BOM)
3073 (b'\xef\xbb\xbf', 'utf
-8'),
3074 (b'\x00\x00\xfe\xff', 'utf
-32-be
'),
3075 (b'\xff\xfe\x00\x00', 'utf
-32-le
'),
3076 (b'\xff\xfe', 'utf
-16-le
'),
3077 (b'\xfe\xff', 'utf
-16-be
'),
3081 def is_html(first_bytes):
3082 """ Detect whether a file contains HTML by examining its first bytes. """
3085 for bom, enc in BOMS:
3086 while first_bytes.startswith(bom):
3087 encoding, first_bytes = enc, first_bytes[len(bom):]
3089 return re.match(r'^\s
*<', first_bytes.decode(encoding, 'replace
'))
3092 def determine_protocol(info_dict):
3093 protocol = info_dict.get('protocol
')
3094 if protocol is not None:
3097 url = sanitize_url(info_dict['url
'])
3098 if url.startswith('rtmp
'):
3100 elif url.startswith('mms
'):
3102 elif url.startswith('rtsp
'):
3105 ext = determine_ext(url)
3107 return 'm3u8
' if info_dict.get('is_live
') else 'm3u8_native
'
3111 return urllib.parse.urlparse(url).scheme
3114 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3115 """ Render a list of rows, each as a list of values.
3116 Text after a \t will be right aligned """
3118 return len(remove_terminal_sequences(string).replace('\t', ''))
3120 def get_max_lens(table):
3121 return [max(width(str(v)) for v in col) for col in zip(*table)]
3123 def filter_using_list(row, filterArray):
3124 return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3126 max_lens = get_max_lens(data) if hide_empty else []
3127 header_row = filter_using_list(header_row, max_lens)
3128 data = [filter_using_list(row, max_lens) for row in data]
3130 table = [header_row] + data
3131 max_lens = get_max_lens(table)
3134 table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3135 table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
3137 for pos, text in enumerate(map(str, row)):
3139 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3141 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3142 ret = '\n'.join(''.join(row).rstrip() for row in table)
3146 def _match_one(filter_part, dct, incomplete):
3147 # TODO: Generalize code with YoutubeDL._build_format_filter
3148 STRING_OPERATORS = {
3149 '*=': operator.contains,
3150 '^
=': lambda attr, value: attr.startswith(value),
3151 '$
=': lambda attr, value: attr.endswith(value),
3152 '~
=': lambda attr, value: re.search(value, attr),
3154 COMPARISON_OPERATORS = {
3156 '<=': operator.le, # "<=" must be defined above "<"
3163 if isinstance(incomplete, bool):
3164 is_incomplete = lambda _: incomplete
3166 is_incomplete = lambda k: k in incomplete
3168 operator_rex = re.compile(r'''(?x)
3170 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3172 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3175 ''' % '|
'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3176 m = operator_rex.fullmatch(filter_part.strip())
3179 unnegated_op = COMPARISON_OPERATORS[m['op
']]
3181 op = lambda attr, value: not unnegated_op(attr, value)
3184 comparison_value = m['quotedstrval
'] or m['strval
'] or m['intval
']
3186 comparison_value = comparison_value.replace(r'\
%s' % m['quote
'], m['quote
'])
3187 actual_value = dct.get(m['key
'])
3188 numeric_comparison = None
3189 if isinstance(actual_value, (int, float)):
3190 # If the original field is a string and matching comparisonvalue is
3191 # a number we should respect the origin of the original field
3192 # and process comparison value as a string (see
3193 # https://github.com/ytdl-org/youtube-dl/issues/11082)
3195 numeric_comparison = int(comparison_value)
3197 numeric_comparison = parse_filesize(comparison_value)
3198 if numeric_comparison is None:
3199 numeric_comparison = parse_filesize(f'{comparison_value}B
')
3200 if numeric_comparison is None:
3201 numeric_comparison = parse_duration(comparison_value)
3202 if numeric_comparison is not None and m['op
'] in STRING_OPERATORS:
3203 raise ValueError('Operator
%s only supports string values
!' % m['op
'])
3204 if actual_value is None:
3205 return is_incomplete(m['key
']) or m['none_inclusive
']
3206 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3209 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3210 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3212 operator_rex = re.compile(r'''(?x)
3213 (?P<op>%s)\s*(?P<key>[a-z_]+)
3214 ''' % '|
'.join(map(re.escape, UNARY_OPERATORS.keys())))
3215 m = operator_rex.fullmatch(filter_part.strip())
3217 op = UNARY_OPERATORS[m.group('op
')]
3218 actual_value = dct.get(m.group('key
'))
3219 if is_incomplete(m.group('key
')) and actual_value is None:
3221 return op(actual_value)
3223 raise ValueError('Invalid
filter part
%r' % filter_part)
3226 def match_str(filter_str, dct, incomplete=False):
3227 """ Filter a dictionary with a simple string syntax.
3228 @returns Whether the filter passes
3229 @param incomplete Set of keys that is expected to be missing from dct.
3230 Can be True/False to indicate all/none of the keys may be missing.
3231 All conditions on incomplete keys pass if the key is missing
3234 _match_one(filter_part.replace(r'\
&', '&'), dct, incomplete)
3235 for filter_part in re.split(r'(?
<!\\)&', filter_str))
3238 def match_filter_func(filters, breaking_filters=None):
3239 if not filters and not breaking_filters:
3241 breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
3242 filters = set(variadic(filters or []))
3244 interactive = '-' in filters
3248 def _match_func(info_dict, incomplete=False):
3249 ret = breaking_filters(info_dict, incomplete)
3251 raise RejectedVideoReached(ret)
3253 if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3254 return NO_DEFAULT if interactive and not incomplete else None
3256 video_title = info_dict.get('title
') or info_dict.get('id') or 'entry
'
3257 filter_str = ') |
('.join(map(str.strip, filters))
3258 return f'{video_title} does
not pass filter ({filter_str}
), skipping
..'
3262 class download_range_func:
3263 def __init__(self, chapters, ranges, from_info=False):
3264 self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
3266 def __call__(self, info_dict, ydl):
3268 warning = ('There are no chapters matching the regex
' if info_dict.get('chapters
')
3269 else 'Cannot match chapters since chapter information
is unavailable
')
3270 for regex in self.chapters or []:
3271 for i, chapter in enumerate(info_dict.get('chapters
') or []):
3272 if re.search(regex, chapter['title
']):
3274 yield {**chapter, 'index': i}
3275 if self.chapters and warning:
3276 ydl.to_screen(f'[info
] {info_dict["id"]}
: {warning}
')
3278 for start, end in self.ranges or []:
3280 'start_time
': self._handle_negative_timestamp(start, info_dict),
3281 'end_time
': self._handle_negative_timestamp(end, info_dict),
3284 if self.from_info and (info_dict.get('start_time
') or info_dict.get('end_time
')):
3286 'start_time
': info_dict.get('start_time
') or 0,
3287 'end_time
': info_dict.get('end_time
') or float('inf
'),
3289 elif not self.ranges and not self.chapters:
3293 def _handle_negative_timestamp(time, info):
3294 return max(info['duration
'] + time, 0) if info.get('duration
') and time < 0 else time
3296 def __eq__(self, other):
3297 return (isinstance(other, download_range_func)
3298 and self.chapters == other.chapters and self.ranges == other.ranges)
3301 return f'{__name__}
.{type(self).__name__}
({self.chapters}
, {self.ranges}
)'
3304 def parse_dfxp_time_expr(time_expr):
3308 mobj = re.match(rf'^
(?P
<time_offset
>{NUMBER_RE}
)s?$
', time_expr)
3310 return float(mobj.group('time_offset
'))
3312 mobj = re.match(r'^
(\d
+):(\d\d
):(\d\
d(?
:(?
:\
.|
:)\d
+)?
)$
', time_expr)
3314 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3317 def srt_subtitles_timecode(seconds):
3318 return '%02d
:%02d
:%02d
,%03d
' % timetuple_from_msec(seconds * 1000)
3321 def ass_subtitles_timecode(seconds):
3322 time = timetuple_from_msec(seconds * 1000)
3323 return '%01d
:%02d
:%02d
.%02d
' % (*time[:-1], time.milliseconds / 10)
3326 def dfxp2srt(dfxp_data):
3328 @param dfxp_data A bytes-like object containing DFXP data
3329 @returns A unicode object containing converted SRT data
3331 LEGACY_NAMESPACES = (
3332 (b'http
://www
.w3
.org
/ns
/ttml
', [
3333 b'http
://www
.w3
.org
/2004/11/ttaf1
',
3334 b'http
://www
.w3
.org
/2006/04/ttaf1
',
3335 b'http
://www
.w3
.org
/2006/10/ttaf1
',
3337 (b'http
://www
.w3
.org
/ns
/ttml
#styling', [
3338 b
'http://www.w3.org/ns/ttml#style',
3342 SUPPORTED_STYLING
= [
3351 _x
= functools
.partial(xpath_with_ns
, ns_map
={
3352 'xml': 'http://www.w3.org/XML/1998/namespace',
3353 'ttml': 'http://www.w3.org/ns/ttml',
3354 'tts': 'http://www.w3.org/ns/ttml#styling',
3360 class TTMLPElementParser
:
3362 _unclosed_elements
= []
3363 _applied_styles
= []
3365 def start(self
, tag
, attrib
):
3366 if tag
in (_x('ttml:br'), 'br'):
3369 unclosed_elements
= []
3371 element_style_id
= attrib
.get('style')
3373 style
.update(default_style
)
3374 if element_style_id
:
3375 style
.update(styles
.get(element_style_id
, {}))
3376 for prop
in SUPPORTED_STYLING
:
3377 prop_val
= attrib
.get(_x('tts:' + prop
))
3379 style
[prop
] = prop_val
3382 for k
, v
in sorted(style
.items()):
3383 if self
._applied
_styles
and self
._applied
_styles
[-1].get(k
) == v
:
3386 font
+= ' color="%s"' % v
3387 elif k
== 'fontSize':
3388 font
+= ' size="%s"' % v
3389 elif k
== 'fontFamily':
3390 font
+= ' face="%s"' % v
3391 elif k
== 'fontWeight' and v
== 'bold':
3393 unclosed_elements
.append('b')
3394 elif k
== 'fontStyle' and v
== 'italic':
3396 unclosed_elements
.append('i')
3397 elif k
== 'textDecoration' and v
== 'underline':
3399 unclosed_elements
.append('u')
3401 self
._out
+= '<font' + font
+ '>'
3402 unclosed_elements
.append('font')
3404 if self
._applied
_styles
:
3405 applied_style
.update(self
._applied
_styles
[-1])
3406 applied_style
.update(style
)
3407 self
._applied
_styles
.append(applied_style
)
3408 self
._unclosed
_elements
.append(unclosed_elements
)
3411 if tag
not in (_x('ttml:br'), 'br'):
3412 unclosed_elements
= self
._unclosed
_elements
.pop()
3413 for element
in reversed(unclosed_elements
):
3414 self
._out
+= '</%s>' % element
3415 if unclosed_elements
and self
._applied
_styles
:
3416 self
._applied
_styles
.pop()
3418 def data(self
, data
):
3422 return self
._out
.strip()
3424 # Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
3425 # This will not trigger false positives since only UTF-8 text is being replaced
3426 dfxp_data
= dfxp_data
.replace(b
'encoding=\'UTF-16\'', b
'encoding=\'UTF-8\'')
3428 def parse_node(node
):
3429 target
= TTMLPElementParser()
3430 parser
= xml
.etree
.ElementTree
.XMLParser(target
=target
)
3431 parser
.feed(xml
.etree
.ElementTree
.tostring(node
))
3432 return parser
.close()
3434 for k
, v
in LEGACY_NAMESPACES
:
3436 dfxp_data
= dfxp_data
.replace(ns
, k
)
3438 dfxp
= compat_etree_fromstring(dfxp_data
)
3440 paras
= dfxp
.findall(_x('.//ttml:p')) or dfxp
.findall('.//p')
3443 raise ValueError('Invalid dfxp/TTML subtitle')
3447 for style
in dfxp
.findall(_x('.//ttml:style')):
3448 style_id
= style
.get('id') or style
.get(_x('xml:id'))
3451 parent_style_id
= style
.get('style')
3453 if parent_style_id
not in styles
:
3456 styles
[style_id
] = styles
[parent_style_id
].copy()
3457 for prop
in SUPPORTED_STYLING
:
3458 prop_val
= style
.get(_x('tts:' + prop
))
3460 styles
.setdefault(style_id
, {})[prop
] = prop_val
3466 for p
in ('body', 'div'):
3467 ele
= xpath_element(dfxp
, [_x('.//ttml:' + p
), './/' + p
])
3470 style
= styles
.get(ele
.get('style'))
3473 default_style
.update(style
)
3475 for para
, index
in zip(paras
, itertools
.count(1)):
3476 begin_time
= parse_dfxp_time_expr(para
.attrib
.get('begin'))
3477 end_time
= parse_dfxp_time_expr(para
.attrib
.get('end'))
3478 dur
= parse_dfxp_time_expr(para
.attrib
.get('dur'))
3479 if begin_time
is None:
3484 end_time
= begin_time
+ dur
3485 out
.append('%d\n%s --> %s\n%s\n\n' % (
3487 srt_subtitles_timecode(begin_time
),
3488 srt_subtitles_timecode(end_time
),
3494 def cli_option(params
, command_option
, param
, separator
=None):
3495 param
= params
.get(param
)
3496 return ([] if param
is None
3497 else [command_option
, str(param
)] if separator
is None
3498 else [f
'{command_option}{separator}{param}'])
3501 def cli_bool_option(params
, command_option
, param
, true_value
='true', false_value
='false', separator
=None):
3502 param
= params
.get(param
)
3503 assert param
in (True, False, None)
3504 return cli_option({True: true_value, False: false_value}
, command_option
, param
, separator
)
3507 def cli_valueless_option(params
, command_option
, param
, expected_value
=True):
3508 return [command_option
] if params
.get(param
) == expected_value
else []
3511 def cli_configuration_args(argdict
, keys
, default
=[], use_compat
=True):
3512 if isinstance(argdict
, (list, tuple)): # for backward compatibility
3519 assert isinstance(argdict
, dict)
3521 assert isinstance(keys
, (list, tuple))
3522 for key_list
in keys
:
3523 arg_list
= list(filter(
3524 lambda x
: x
is not None,
3525 [argdict
.get(key
.lower()) for key
in variadic(key_list
)]))
3527 return [arg
for args
in arg_list
for arg
in args
]
3531 def _configuration_args(main_key
, argdict
, exe
, keys
=None, default
=[], use_compat
=True):
3532 main_key
, exe
= main_key
.lower(), exe
.lower()
3533 root_key
= exe
if main_key
== exe
else f
'{main_key}+{exe}'
3534 keys
= [f
'{root_key}{k}' for k
in (keys
or [''])]
3535 if root_key
in keys
:
3537 keys
.append((main_key
, exe
))
3538 keys
.append('default')
3541 return cli_configuration_args(argdict
, keys
, default
, use_compat
)
3545 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3604 'iw': 'heb', # Replaced by he in 1989 revision
3614 'in': 'ind', # Replaced by id in 1989 revision
3730 'ji': 'yid', # Replaced by yi in 1989 revision
3738 def short2long(cls
, code
):
3739 """Convert language code from ISO 639-1 to ISO 639-2/T"""
3740 return cls
._lang
_map
.get(code
[:2])
3743 def long2short(cls
, code
):
3744 """Convert language code from ISO 639-2/T to ISO 639-1"""
3745 for short_name
, long_name
in cls
._lang
_map
.items():
3746 if long_name
== code
:
3751 # From http://data.okfn.org/data/core/country-list
3753 'AF': 'Afghanistan',
3754 'AX': 'Åland Islands',
3757 'AS': 'American Samoa',
3762 'AG': 'Antigua and Barbuda',
3779 'BO': 'Bolivia, Plurinational State of',
3780 'BQ': 'Bonaire, Sint Eustatius and Saba',
3781 'BA': 'Bosnia and Herzegovina',
3783 'BV': 'Bouvet Island',
3785 'IO': 'British Indian Ocean Territory',
3786 'BN': 'Brunei Darussalam',
3788 'BF': 'Burkina Faso',
3794 'KY': 'Cayman Islands',
3795 'CF': 'Central African Republic',
3799 'CX': 'Christmas Island',
3800 'CC': 'Cocos (Keeling) Islands',
3804 'CD': 'Congo, the Democratic Republic of the',
3805 'CK': 'Cook Islands',
3807 'CI': 'Côte d\'Ivoire',
3812 'CZ': 'Czech Republic',
3816 'DO': 'Dominican Republic',
3819 'SV': 'El Salvador',
3820 'GQ': 'Equatorial Guinea',
3824 'FK': 'Falkland Islands (Malvinas)',
3825 'FO': 'Faroe Islands',
3829 'GF': 'French Guiana',
3830 'PF': 'French Polynesia',
3831 'TF': 'French Southern Territories',
3846 'GW': 'Guinea-Bissau',
3849 'HM': 'Heard Island and McDonald Islands',
3850 'VA': 'Holy See (Vatican City State)',
3857 'IR': 'Iran, Islamic Republic of',
3860 'IM': 'Isle of Man',
3870 'KP': 'Korea, Democratic People\'s Republic of',
3871 'KR': 'Korea, Republic of',
3874 'LA': 'Lao People\'s Democratic Republic',
3880 'LI': 'Liechtenstein',
3884 'MK': 'Macedonia, the Former Yugoslav Republic of',
3891 'MH': 'Marshall Islands',
3897 'FM': 'Micronesia, Federated States of',
3898 'MD': 'Moldova, Republic of',
3909 'NL': 'Netherlands',
3910 'NC': 'New Caledonia',
3911 'NZ': 'New Zealand',
3916 'NF': 'Norfolk Island',
3917 'MP': 'Northern Mariana Islands',
3922 'PS': 'Palestine, State of',
3924 'PG': 'Papua New Guinea',
3927 'PH': 'Philippines',
3931 'PR': 'Puerto Rico',
3935 'RU': 'Russian Federation',
3937 'BL': 'Saint Barthélemy',
3938 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
3939 'KN': 'Saint Kitts and Nevis',
3940 'LC': 'Saint Lucia',
3941 'MF': 'Saint Martin (French part)',
3942 'PM': 'Saint Pierre and Miquelon',
3943 'VC': 'Saint Vincent and the Grenadines',
3946 'ST': 'Sao Tome and Principe',
3947 'SA': 'Saudi Arabia',
3951 'SL': 'Sierra Leone',
3953 'SX': 'Sint Maarten (Dutch part)',
3956 'SB': 'Solomon Islands',
3958 'ZA': 'South Africa',
3959 'GS': 'South Georgia and the South Sandwich Islands',
3960 'SS': 'South Sudan',
3965 'SJ': 'Svalbard and Jan Mayen',
3968 'CH': 'Switzerland',
3969 'SY': 'Syrian Arab Republic',
3970 'TW': 'Taiwan, Province of China',
3972 'TZ': 'Tanzania, United Republic of',
3974 'TL': 'Timor-Leste',
3978 'TT': 'Trinidad and Tobago',
3981 'TM': 'Turkmenistan',
3982 'TC': 'Turks and Caicos Islands',
3986 'AE': 'United Arab Emirates',
3987 'GB': 'United Kingdom',
3988 'US': 'United States',
3989 'UM': 'United States Minor Outlying Islands',
3993 'VE': 'Venezuela, Bolivarian Republic of',
3995 'VG': 'Virgin Islands, British',
3996 'VI': 'Virgin Islands, U.S.',
3997 'WF': 'Wallis and Futuna',
3998 'EH': 'Western Sahara',
4002 # Not ISO 3166 codes, but used for IP blocks
4003 'AP': 'Asia/Pacific Region',
4008 def short2full(cls
, code
):
4009 """Convert an ISO 3166-2 country code to the corresponding full name"""
4010 return cls
._country
_map
.get(code
.upper())
4014 # Major IPv4 address blocks per country
4016 'AD': '46.172.224.0/19',
4017 'AE': '94.200.0.0/13',
4018 'AF': '149.54.0.0/17',
4019 'AG': '209.59.64.0/18',
4020 'AI': '204.14.248.0/21',
4021 'AL': '46.99.0.0/16',
4022 'AM': '46.70.0.0/15',
4023 'AO': '105.168.0.0/13',
4024 'AP': '182.50.184.0/21',
4025 'AQ': '23.154.160.0/24',
4026 'AR': '181.0.0.0/12',
4027 'AS': '202.70.112.0/20',
4028 'AT': '77.116.0.0/14',
4029 'AU': '1.128.0.0/11',
4030 'AW': '181.41.0.0/18',
4031 'AX': '185.217.4.0/22',
4032 'AZ': '5.197.0.0/16',
4033 'BA': '31.176.128.0/17',
4034 'BB': '65.48.128.0/17',
4035 'BD': '114.130.0.0/16',
4037 'BF': '102.178.0.0/15',
4038 'BG': '95.42.0.0/15',
4039 'BH': '37.131.0.0/17',
4040 'BI': '154.117.192.0/18',
4041 'BJ': '137.255.0.0/16',
4042 'BL': '185.212.72.0/23',
4043 'BM': '196.12.64.0/18',
4044 'BN': '156.31.0.0/16',
4045 'BO': '161.56.0.0/16',
4046 'BQ': '161.0.80.0/20',
4047 'BR': '191.128.0.0/12',
4048 'BS': '24.51.64.0/18',
4049 'BT': '119.2.96.0/19',
4050 'BW': '168.167.0.0/16',
4051 'BY': '178.120.0.0/13',
4052 'BZ': '179.42.192.0/18',
4053 'CA': '99.224.0.0/11',
4054 'CD': '41.243.0.0/16',
4055 'CF': '197.242.176.0/21',
4056 'CG': '160.113.0.0/16',
4057 'CH': '85.0.0.0/13',
4058 'CI': '102.136.0.0/14',
4059 'CK': '202.65.32.0/19',
4060 'CL': '152.172.0.0/14',
4061 'CM': '102.244.0.0/14',
4062 'CN': '36.128.0.0/10',
4063 'CO': '181.240.0.0/12',
4064 'CR': '201.192.0.0/12',
4065 'CU': '152.206.0.0/15',
4066 'CV': '165.90.96.0/19',
4067 'CW': '190.88.128.0/17',
4068 'CY': '31.153.0.0/16',
4069 'CZ': '88.100.0.0/14',
4071 'DJ': '197.241.0.0/17',
4072 'DK': '87.48.0.0/12',
4073 'DM': '192.243.48.0/20',
4074 'DO': '152.166.0.0/15',
4075 'DZ': '41.96.0.0/12',
4076 'EC': '186.68.0.0/15',
4077 'EE': '90.190.0.0/15',
4078 'EG': '156.160.0.0/11',
4079 'ER': '196.200.96.0/20',
4080 'ES': '88.0.0.0/11',
4081 'ET': '196.188.0.0/14',
4082 'EU': '2.16.0.0/13',
4083 'FI': '91.152.0.0/13',
4084 'FJ': '144.120.0.0/16',
4085 'FK': '80.73.208.0/21',
4086 'FM': '119.252.112.0/20',
4087 'FO': '88.85.32.0/19',
4089 'GA': '41.158.0.0/15',
4091 'GD': '74.122.88.0/21',
4092 'GE': '31.146.0.0/16',
4093 'GF': '161.22.64.0/18',
4094 'GG': '62.68.160.0/19',
4095 'GH': '154.160.0.0/12',
4096 'GI': '95.164.0.0/16',
4097 'GL': '88.83.0.0/19',
4098 'GM': '160.182.0.0/15',
4099 'GN': '197.149.192.0/18',
4100 'GP': '104.250.0.0/19',
4101 'GQ': '105.235.224.0/20',
4102 'GR': '94.64.0.0/13',
4103 'GT': '168.234.0.0/16',
4104 'GU': '168.123.0.0/16',
4105 'GW': '197.214.80.0/20',
4106 'GY': '181.41.64.0/18',
4107 'HK': '113.252.0.0/14',
4108 'HN': '181.210.0.0/16',
4109 'HR': '93.136.0.0/13',
4110 'HT': '148.102.128.0/17',
4111 'HU': '84.0.0.0/14',
4112 'ID': '39.192.0.0/10',
4113 'IE': '87.32.0.0/12',
4114 'IL': '79.176.0.0/13',
4115 'IM': '5.62.80.0/20',
4116 'IN': '117.192.0.0/10',
4117 'IO': '203.83.48.0/21',
4118 'IQ': '37.236.0.0/14',
4119 'IR': '2.176.0.0/12',
4120 'IS': '82.221.0.0/16',
4121 'IT': '79.0.0.0/10',
4122 'JE': '87.244.64.0/18',
4123 'JM': '72.27.0.0/17',
4124 'JO': '176.29.0.0/16',
4125 'JP': '133.0.0.0/8',
4126 'KE': '105.48.0.0/12',
4127 'KG': '158.181.128.0/17',
4128 'KH': '36.37.128.0/17',
4129 'KI': '103.25.140.0/22',
4130 'KM': '197.255.224.0/20',
4131 'KN': '198.167.192.0/19',
4132 'KP': '175.45.176.0/22',
4133 'KR': '175.192.0.0/10',
4134 'KW': '37.36.0.0/14',
4135 'KY': '64.96.0.0/15',
4136 'KZ': '2.72.0.0/13',
4137 'LA': '115.84.64.0/18',
4138 'LB': '178.135.0.0/16',
4139 'LC': '24.92.144.0/20',
4140 'LI': '82.117.0.0/19',
4141 'LK': '112.134.0.0/15',
4142 'LR': '102.183.0.0/16',
4143 'LS': '129.232.0.0/17',
4144 'LT': '78.56.0.0/13',
4145 'LU': '188.42.0.0/16',
4146 'LV': '46.109.0.0/16',
4147 'LY': '41.252.0.0/14',
4148 'MA': '105.128.0.0/11',
4149 'MC': '88.209.64.0/18',
4150 'MD': '37.246.0.0/16',
4151 'ME': '178.175.0.0/17',
4152 'MF': '74.112.232.0/21',
4153 'MG': '154.126.0.0/17',
4154 'MH': '117.103.88.0/21',
4155 'MK': '77.28.0.0/15',
4156 'ML': '154.118.128.0/18',
4157 'MM': '37.111.0.0/17',
4158 'MN': '49.0.128.0/17',
4159 'MO': '60.246.0.0/16',
4160 'MP': '202.88.64.0/20',
4161 'MQ': '109.203.224.0/19',
4162 'MR': '41.188.64.0/18',
4163 'MS': '208.90.112.0/22',
4164 'MT': '46.11.0.0/16',
4165 'MU': '105.16.0.0/12',
4166 'MV': '27.114.128.0/18',
4167 'MW': '102.70.0.0/15',
4168 'MX': '187.192.0.0/11',
4169 'MY': '175.136.0.0/13',
4170 'MZ': '197.218.0.0/15',
4171 'NA': '41.182.0.0/16',
4172 'NC': '101.101.0.0/18',
4173 'NE': '197.214.0.0/18',
4174 'NF': '203.17.240.0/22',
4175 'NG': '105.112.0.0/12',
4176 'NI': '186.76.0.0/15',
4177 'NL': '145.96.0.0/11',
4178 'NO': '84.208.0.0/13',
4179 'NP': '36.252.0.0/15',
4180 'NR': '203.98.224.0/19',
4181 'NU': '49.156.48.0/22',
4182 'NZ': '49.224.0.0/14',
4183 'OM': '5.36.0.0/15',
4184 'PA': '186.72.0.0/15',
4185 'PE': '186.160.0.0/14',
4186 'PF': '123.50.64.0/18',
4187 'PG': '124.240.192.0/19',
4188 'PH': '49.144.0.0/13',
4189 'PK': '39.32.0.0/11',
4190 'PL': '83.0.0.0/11',
4191 'PM': '70.36.0.0/20',
4192 'PR': '66.50.0.0/16',
4193 'PS': '188.161.0.0/16',
4194 'PT': '85.240.0.0/13',
4195 'PW': '202.124.224.0/20',
4196 'PY': '181.120.0.0/14',
4197 'QA': '37.210.0.0/15',
4198 'RE': '102.35.0.0/16',
4199 'RO': '79.112.0.0/13',
4200 'RS': '93.86.0.0/15',
4201 'RU': '5.136.0.0/13',
4202 'RW': '41.186.0.0/16',
4203 'SA': '188.48.0.0/13',
4204 'SB': '202.1.160.0/19',
4205 'SC': '154.192.0.0/11',
4206 'SD': '102.120.0.0/13',
4207 'SE': '78.64.0.0/12',
4208 'SG': '8.128.0.0/10',
4209 'SI': '188.196.0.0/14',
4210 'SK': '78.98.0.0/15',
4211 'SL': '102.143.0.0/17',
4212 'SM': '89.186.32.0/19',
4213 'SN': '41.82.0.0/15',
4214 'SO': '154.115.192.0/18',
4215 'SR': '186.179.128.0/17',
4216 'SS': '105.235.208.0/21',
4217 'ST': '197.159.160.0/19',
4218 'SV': '168.243.0.0/16',
4219 'SX': '190.102.0.0/20',
4221 'SZ': '41.84.224.0/19',
4222 'TC': '65.255.48.0/20',
4223 'TD': '154.68.128.0/19',
4224 'TG': '196.168.0.0/14',
4225 'TH': '171.96.0.0/13',
4226 'TJ': '85.9.128.0/18',
4227 'TK': '27.96.24.0/21',
4228 'TL': '180.189.160.0/20',
4229 'TM': '95.85.96.0/19',
4230 'TN': '197.0.0.0/11',
4231 'TO': '175.176.144.0/21',
4232 'TR': '78.160.0.0/11',
4233 'TT': '186.44.0.0/15',
4234 'TV': '202.2.96.0/19',
4235 'TW': '120.96.0.0/11',
4236 'TZ': '156.156.0.0/14',
4237 'UA': '37.52.0.0/14',
4238 'UG': '102.80.0.0/13',
4240 'UY': '167.56.0.0/13',
4241 'UZ': '84.54.64.0/18',
4242 'VA': '212.77.0.0/19',
4243 'VC': '207.191.240.0/21',
4244 'VE': '186.88.0.0/13',
4245 'VG': '66.81.192.0/20',
4246 'VI': '146.226.0.0/16',
4247 'VN': '14.160.0.0/11',
4248 'VU': '202.80.32.0/20',
4249 'WF': '117.20.32.0/21',
4250 'WS': '202.4.32.0/19',
4251 'YE': '134.35.0.0/16',
4252 'YT': '41.242.116.0/22',
4253 'ZA': '41.0.0.0/11',
4254 'ZM': '102.144.0.0/13',
4255 'ZW': '102.177.192.0/18',
4259 def random_ipv4(cls
, code_or_block
):
4260 if len(code_or_block
) == 2:
4261 block
= cls
._country
_ip
_map
.get(code_or_block
.upper())
4265 block
= code_or_block
4266 addr
, preflen
= block
.split('/')
4267 addr_min
= struct
.unpack('!L', socket
.inet_aton(addr
))[0]
4268 addr_max
= addr_min |
(0xffffffff >> int(preflen
))
4269 return str(socket
.inet_ntoa(
4270 struct
.pack('!L', random
.randint(addr_min
, addr_max
))))
4273 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4274 # released into Public Domain
4275 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4277 def long_to_bytes(n
, blocksize
=0):
4278 """long_to_bytes(n:long, blocksize:int) : string
4279 Convert a long integer to a byte string.
4281 If optional blocksize is given and greater than zero, pad the front of the
4282 byte string with binary zeros so that the length is a multiple of
4285 # after much testing, this algorithm was deemed to be the fastest
4289 s
= struct
.pack('>I', n
& 0xffffffff) + s
4291 # strip off leading zeros
4292 for i
in range(len(s
)):
4293 if s
[i
] != b
'\000'[0]:
4296 # only happens when n == 0
4300 # add back some pad bytes. this could be done more efficiently w.r.t. the
4301 # de-padding being done above, but sigh...
4302 if blocksize
> 0 and len(s
) % blocksize
:
4303 s
= (blocksize
- len(s
) % blocksize
) * b
'\000' + s
4307 def bytes_to_long(s
):
4308 """bytes_to_long(string) : long
4309 Convert a byte string to a long integer.
4311 This is (essentially) the inverse of long_to_bytes().
4316 extra
= (4 - length
% 4)
4317 s
= b
'\000' * extra
+ s
4318 length
= length
+ extra
4319 for i
in range(0, length
, 4):
4320 acc
= (acc
<< 32) + struct
.unpack('>I', s
[i
:i
+ 4])[0]
4324 def ohdave_rsa_encrypt(data
, exponent
, modulus
):
4326 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4329 data: data to encrypt, bytes-like object
4330 exponent, modulus: parameter e and N of RSA algorithm, both integer
4331 Output: hex string of encrypted data
4333 Limitation: supports one block encryption only
4336 payload
= int(binascii
.hexlify(data
[::-1]), 16)
4337 encrypted
= pow(payload
, exponent
, modulus
)
4338 return '%x' % encrypted
4341 def pkcs1pad(data
, length
):
4343 Padding input data with PKCS#1 scheme
4345 @param {int[]} data input data
4346 @param {int} length target length
4347 @returns {int[]} padded data
4349 if len(data
) > length
- 11:
4350 raise ValueError('Input data too long for PKCS#1 padding')
4352 pseudo_random
= [random
.randint(0, 254) for _
in range(length
- len(data
) - 3)]
4353 return [0, 2] + pseudo_random
+ [0] + data
4356 def _base_n_table(n
, table
):
4357 if not table
and not n
:
4358 raise ValueError('Either table or n must be specified')
4359 table
= (table
or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n
]
4361 if n
and n
!= len(table
):
4362 raise ValueError(f
'base {n} exceeds table length {len(table)}')
4366 def encode_base_n(num
, n
=None, table
=None):
4367 """Convert given int to a base-n string"""
4368 table
= _base_n_table(n
, table
)
4372 result
, base
= '', len(table
)
4374 result
= table
[num
% base
] + result
4379 def decode_base_n(string
, n
=None, table
=None):
4380 """Convert given base-n string to int"""
4381 table
= {char: index for index, char in enumerate(_base_n_table(n, table))}
4382 result
, base
= 0, len(table
)
4384 result
= result
* base
+ table
[char
]
4388 def decode_packed_codes(code
):
4389 mobj
= re
.search(PACKED_CODES_RE
, code
)
4390 obfuscated_code
, base
, count
, symbols
= mobj
.groups()
4393 symbols
= symbols
.split('|')
4398 base_n_count
= encode_base_n(count
, base
)
4399 symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
4402 r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)],
4406 def caesar(s
, alphabet
, shift
):
4411 alphabet
[(alphabet
.index(c
) + shift
) % l
] if c
in alphabet
else c
4416 return caesar(s
, r
'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4419 def parse_m3u8_attributes(attrib
):
4421 for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
):
4422 if val
.startswith('"'):
4428 def urshift(val
, n
):
4429 return val
>> n
if val
>= 0 else (val
+ 0x100000000) >> n
4432 def write_xattr(path
, key
, value
):
4433 # Windows: Write xattrs to NTFS Alternate Data Streams:
4434 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4435 if compat_os_name
== 'nt':
4436 assert ':' not in key
4437 assert os
.path
.exists(path
)
4440 with open(f
'{path}:{key}', 'wb') as f
:
4442 except OSError as e
:
4443 raise XAttrMetadataError(e
.errno
, e
.strerror
)
4446 # UNIX Method 1. Use os.setxattr/xattrs/pyxattrs modules
4449 if callable(getattr(os
, 'setxattr', None)):
4450 setxattr
= os
.setxattr
4451 elif getattr(xattr
, '_yt_dlp__identifier', None) == 'pyxattr':
4452 # Unicode arguments are not supported in pyxattr until version 0.5.0
4453 # See https://github.com/ytdl-org/youtube-dl/issues/5498
4454 if version_tuple(xattr
.__version
__) >= (0, 5, 0):
4455 setxattr
= xattr
.set
4457 setxattr
= xattr
.setxattr
4461 setxattr(path
, key
, value
)
4462 except OSError as e
:
4463 raise XAttrMetadataError(e
.errno
, e
.strerror
)
4466 # UNIX Method 2. Use setfattr/xattr executables
4467 exe
= ('setfattr' if check_executable('setfattr', ['--version'])
4468 else 'xattr' if check_executable('xattr', ['-h']) else None)
4470 raise XAttrUnavailableError(
4471 'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
4472 + ('"xattr" binary' if sys
.platform
!= 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
4474 value
= value
.decode()
4476 _
, stderr
, returncode
= Popen
.run(
4477 [exe
, '-w', key
, value
, path
] if exe
== 'xattr' else [exe
, '-n', key
, '-v', value
, path
],
4478 text
=True, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
)
4479 except OSError as e
:
4480 raise XAttrMetadataError(e
.errno
, e
.strerror
)
4482 raise XAttrMetadataError(returncode
, stderr
)
4485 def random_birthday(year_field
, month_field
, day_field
):
4486 start_date
= datetime
.date(1950, 1, 1)
4487 end_date
= datetime
.date(1995, 12, 31)
4488 offset
= random
.randint(0, (end_date
- start_date
).days
)
4489 random_date
= start_date
+ datetime
.timedelta(offset
)
4491 year_field
: str(random_date
.year
),
4492 month_field
: str(random_date
.month
),
4493 day_field
: str(random_date
.day
),
4497 def find_available_port(interface
=''):
4499 with socket
.socket() as sock
:
4500 sock
.bind((interface
, 0))
4501 return sock
.getsockname()[1]
4506 # Templates for internet shortcut files, which are plain text files.
4507 DOT_URL_LINK_TEMPLATE
= '''\
4512 DOT_WEBLOC_LINK_TEMPLATE
= '''\
4513 <?xml version="1.0" encoding="UTF-8"?>
4514 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4515 <plist version="1.0">
4518 \t<string>%(url)s</string>
4523 DOT_DESKTOP_LINK_TEMPLATE
= '''\
4533 'url': DOT_URL_LINK_TEMPLATE
,
4534 'desktop': DOT_DESKTOP_LINK_TEMPLATE
,
4535 'webloc': DOT_WEBLOC_LINK_TEMPLATE
,
4539 def iri_to_uri(iri
):
4541 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
4543 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
4546 iri_parts
= urllib
.parse
.urlparse(iri
)
4548 if '[' in iri_parts
.netloc
:
4549 raise ValueError('IPv6 URIs are not, yet, supported.')
4550 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
4552 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
4555 if iri_parts
.username
:
4556 net_location
+= urllib
.parse
.quote(iri_parts
.username
, safe
=r
"!$%&'()*+,~")
4557 if iri_parts
.password
is not None:
4558 net_location
+= ':' + urllib
.parse
.quote(iri_parts
.password
, safe
=r
"!$%&'()*+,~")
4561 net_location
+= iri_parts
.hostname
.encode('idna').decode() # Punycode for Unicode hostnames.
4562 # The 'idna' encoding produces ASCII text.
4563 if iri_parts
.port
is not None and iri_parts
.port
!= 80:
4564 net_location
+= ':' + str(iri_parts
.port
)
4566 return urllib
.parse
.urlunparse(
4570 urllib
.parse
.quote_plus(iri_parts
.path
, safe
=r
"!$%&'()*+,/:;=@|~"),
4572 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
4573 urllib
.parse
.quote_plus(iri_parts
.params
, safe
=r
"!$%&'()*+,/:;=@|~"),
4575 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
4576 urllib
.parse
.quote_plus(iri_parts
.query
, safe
=r
"!$%&'()*+,/:;=?@{|}~"),
4578 urllib
.parse
.quote_plus(iri_parts
.fragment
, safe
=r
"!#$%&'()*+,/:;=?@{|}~")))
4580 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
4583 def to_high_limit_path(path
):
4584 if sys
.platform
in ['win32', 'cygwin']:
4585 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
4586 return '\\\\?\\' + os
.path
.abspath(path
)
4591 def format_field(obj
, field
=None, template
='%s', ignore
=NO_DEFAULT
, default
='', func
=IDENTITY
):
4592 val
= traversal
.traverse_obj(obj
, *variadic(field
))
4593 if not val
if ignore
is NO_DEFAULT
else val
in variadic(ignore
):
4595 return template
% func(val
)
4598 def clean_podcast_url(url
):
4599 url
= re
.sub(r
'''(?x)
4603 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
4608 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
4611 cn\.co| # https://podcorn.com/analytics-prefix/
4612 st\.fm # https://podsights.com/docs/
4617 return re
.sub(r
'^\w+://(\w+://)', r
'\1', url
)
4620 _HEX_TABLE
= '0123456789abcdef'
4623 def random_uuidv4():
4624 return re
.sub(r
'[xy]', lambda x
: _HEX_TABLE
[random
.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
4627 def make_dir(path
, to_screen
=None):
4629 dn
= os
.path
.dirname(path
)
4631 os
.makedirs(dn
, exist_ok
=True)
4633 except OSError as err
:
4634 if callable(to_screen
) is not None:
4635 to_screen(f
'unable to create directory {err}')
4639 def get_executable_path():
4640 from ..update
import _get_variant_and_executable_path
4642 return os
.path
.dirname(os
.path
.abspath(_get_variant_and_executable_path()[1]))
4645 def get_user_config_dirs(package_name
):
4646 # .config (e.g. ~/.config/package_name)
4647 xdg_config_home
= os
.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
4648 yield os
.path
.join(xdg_config_home
, package_name
)
4650 # appdata (%APPDATA%/package_name)
4651 appdata_dir
= os
.getenv('appdata')
4653 yield os
.path
.join(appdata_dir
, package_name
)
4655 # home (~/.package_name)
4656 yield os
.path
.join(compat_expanduser('~'), f
'.{package_name}')
4659 def get_system_config_dirs(package_name
):
4661 yield os
.path
.join('/etc', package_name
)
4664 def time_seconds(**kwargs
):
4666 Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
4668 return time
.time() + datetime
.timedelta(**kwargs
).total_seconds()
4671 # create a JSON Web Signature (jws) with HS256 algorithm
4672 # the resulting format is in JWS Compact Serialization
4673 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
4674 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
4675 def jwt_encode_hs256(payload_data
, key
, headers
={}):
4681 header_data
.update(headers
)
4682 header_b64
= base64
.b64encode(json
.dumps(header_data
).encode())
4683 payload_b64
= base64
.b64encode(json
.dumps(payload_data
).encode())
4684 h
= hmac
.new(key
.encode(), header_b64
+ b
'.' + payload_b64
, hashlib
.sha256
)
4685 signature_b64
= base64
.b64encode(h
.digest())
4686 token
= header_b64
+ b
'.' + payload_b64
+ b
'.' + signature_b64
4690 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
4691 def jwt_decode_hs256(jwt
):
4692 header_b64
, payload_b64
, signature_b64
= jwt
.split('.')
4693 # add trailing ='s that may have been stripped, superfluous ='s are ignored
4694 payload_data
= json
.loads(base64
.urlsafe_b64decode(f
'{payload_b64}==='))
4698 WINDOWS_VT_MODE
= False if compat_os_name
== 'nt' else None
4702 def supports_terminal_sequences(stream
):
4703 if compat_os_name
== 'nt':
4704 if not WINDOWS_VT_MODE
:
4706 elif not os
.getenv('TERM'):
4709 return stream
.isatty()
4710 except BaseException
:
4714 def windows_enable_vt_mode():
4715 """Ref: https://bugs.python.org/issue30075 """
4716 if get_windows_version() < (10, 0, 10586):
4720 import ctypes
.wintypes
4723 ENABLE_VIRTUAL_TERMINAL_PROCESSING
= 0x0004
4725 dll
= ctypes
.WinDLL('kernel32', use_last_error
=False)
4726 handle
= os
.open('CONOUT$', os
.O_RDWR
)
4728 h_out
= ctypes
.wintypes
.HANDLE(msvcrt
.get_osfhandle(handle
))
4729 dw_original_mode
= ctypes
.wintypes
.DWORD()
4730 success
= dll
.GetConsoleMode(h_out
, ctypes
.byref(dw_original_mode
))
4732 raise Exception('GetConsoleMode failed')
4734 success
= dll
.SetConsoleMode(h_out
, ctypes
.wintypes
.DWORD(
4735 dw_original_mode
.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING
))
4737 raise Exception('SetConsoleMode failed')
4741 global WINDOWS_VT_MODE
4742 WINDOWS_VT_MODE
= True
4743 supports_terminal_sequences
.cache_clear()
4746 _terminal_sequences_re
= re
.compile('\033\\[[^m]+m')
4749 def remove_terminal_sequences(string
):
4750 return _terminal_sequences_re
.sub('', string
)
4753 def number_of_digits(number
):
4754 return len('%d' % number
)
4757 def join_nonempty(*values
, delim
='-', from_dict
=None):
4758 if from_dict
is not None:
4759 values
= (traversal
.traverse_obj(from_dict
, variadic(v
)) for v
in values
)
4760 return delim
.join(map(str, filter(None, values
)))
4763 def scale_thumbnails_to_max_format_width(formats
, thumbnails
, url_width_re
):
4765 Find the largest format dimensions in terms of video width and, for each thumbnail:
4766 * Modify the URL: Match the width with the provided regex and replace with the former width
4769 This function is useful with video services that scale the provided thumbnails on demand
4771 _keys
= ('width', 'height')
4772 max_dimensions
= max(
4773 (tuple(format
.get(k
) or 0 for k
in _keys
) for format
in formats
),
4775 if not max_dimensions
[0]:
4779 {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])}
,
4780 dict(zip(_keys
, max_dimensions
)), thumbnail
)
4781 for thumbnail
in thumbnails
4785 def parse_http_range(range):
4786 """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
4788 return None, None, None
4789 crg
= re
.search(r
'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
4791 return None, None, None
4792 return int(crg
.group(1)), int_or_none(crg
.group(2)), int_or_none(crg
.group(3))
4795 def read_stdin(what
):
4797 eof
= 'Ctrl+Z' if compat_os_name
== 'nt' else 'Ctrl+D'
4798 write_string(f
'Reading {what} from STDIN - EOF ({eof}) to end:\n')
4802 def determine_file_encoding(data
):
4804 Detect the text encoding used
4805 @returns (encoding, bytes to skip)
4808 # BOM marks are given priority over declarations
4809 for bom
, enc
in BOMS
:
4810 if data
.startswith(bom
):
4811 return enc
, len(bom
)
4813 # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
4814 # We ignore the endianness to get a good enough match
4815 data
= data
.replace(b
'\0', b
'')
4816 mobj
= re
.match(rb
'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data
)
4817 return mobj
.group(1).decode() if mobj
else None, 0
4824 __initialized
= False
4826 def __init__(self
, parser
, label
=None):
4827 self
.parser
, self
.label
= parser
, label
4828 self
._loaded
_paths
, self
.configs
= set(), []
4830 def init(self
, args
=None, filename
=None):
4831 assert not self
.__initialized
4832 self
.own_args
, self
.filename
= args
, filename
4833 return self
.load_configs()
4835 def load_configs(self
):
4838 location
= os
.path
.realpath(self
.filename
)
4839 directory
= os
.path
.dirname(location
)
4840 if location
in self
._loaded
_paths
:
4842 self
._loaded
_paths
.add(location
)
4844 self
.__initialized
= True
4845 opts
, _
= self
.parser
.parse_known_args(self
.own_args
)
4846 self
.parsed_args
= self
.own_args
4847 for location
in opts
.config_locations
or []:
4849 if location
in self
._loaded
_paths
:
4851 self
._loaded
_paths
.add(location
)
4852 self
.append_config(shlex
.split(read_stdin('options'), comments
=True), label
='stdin')
4854 location
= os
.path
.join(directory
, expand_path(location
))
4855 if os
.path
.isdir(location
):
4856 location
= os
.path
.join(location
, 'yt-dlp.conf')
4857 if not os
.path
.exists(location
):
4858 self
.parser
.error(f
'config location {location} does not exist')
4859 self
.append_config(self
.read_file(location
), location
)
4863 label
= join_nonempty(
4864 self
.label
, 'config', f
'"{self.filename}"' if self
.filename
else '',
4866 return join_nonempty(
4867 self
.own_args
is not None and f
'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
4868 *(f
'\n{c}'.replace('\n', '\n| ')[1:] for c
in self
.configs
),
4872 def read_file(filename
, default
=[]):
4874 optionf
= open(filename
, 'rb')
4876 return default
# silently skip if file is not present
4878 enc
, skip
= determine_file_encoding(optionf
.read(512))
4879 optionf
.seek(skip
, io
.SEEK_SET
)
4881 enc
= None # silently skip read errors
4883 # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
4884 contents
= optionf
.read().decode(enc
or preferredencoding())
4885 res
= shlex
.split(contents
, comments
=True)
4886 except Exception as err
:
4887 raise ValueError(f
'Unable to parse "{filename}": {err}')
4893 def hide_login_info(opts
):
4894 PRIVATE_OPTS
= {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
4895 eqre
= re
.compile('^(?P<key>' + ('|'.join(re
.escape(po
) for po
in PRIVATE_OPTS
)) + ')=.+$')
4900 return m
.group('key') + '=PRIVATE'
4904 opts
= list(map(_scrub_eq
, opts
))
4905 for idx
, opt
in enumerate(opts
):
4906 if opt
in PRIVATE_OPTS
and idx
+ 1 < len(opts
):
4907 opts
[idx
+ 1] = 'PRIVATE'
4910 def append_config(self
, *args
, label
=None):
4911 config
= type(self
)(self
.parser
, label
)
4912 config
._loaded
_paths
= self
._loaded
_paths
4913 if config
.init(*args
):
4914 self
.configs
.append(config
)
4918 for config
in reversed(self
.configs
):
4919 yield from config
.all_args
4920 yield from self
.parsed_args
or []
4922 def parse_known_args(self
, **kwargs
):
4923 return self
.parser
.parse_known_args(self
.all_args
, **kwargs
)
4925 def parse_args(self
):
4926 return self
.parser
.parse_args(self
.all_args
)
4929 def merge_headers(*dicts
):
4930 """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
4931 return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
4934 def cached_method(f
):
4935 """Cache a method"""
4936 signature
= inspect
.signature(f
)
4939 def wrapper(self
, *args
, **kwargs
):
4940 bound_args
= signature
.bind(self
, *args
, **kwargs
)
4941 bound_args
.apply_defaults()
4942 key
= tuple(bound_args
.arguments
.values())[1:]
4944 cache
= vars(self
).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {}
)
4945 if key
not in cache
:
4946 cache
[key
] = f(self
, *args
, **kwargs
)
4951 class classproperty
:
4952 """property access for class methods with optional caching"""
4953 def __new__(cls
, func
=None, *args
, **kwargs
):
4955 return functools
.partial(cls
, *args
, **kwargs
)
4956 return super().__new
__(cls
)
4958 def __init__(self
, func
, *, cache
=False):
4959 functools
.update_wrapper(self
, func
)
4961 self
._cache
= {} if cache
else None
4963 def __get__(self
, _
, cls
):
4964 if self
._cache
is None:
4965 return self
.func(cls
)
4966 elif cls
not in self
._cache
:
4967 self
._cache
[cls
] = self
.func(cls
)
4968 return self
._cache
[cls
]
4971 class function_with_repr
:
4972 def __init__(self
, func
, repr_
=None):
4973 functools
.update_wrapper(self
, func
)
4974 self
.func
, self
.__repr
= func
, repr_
4976 def __call__(self
, *args
, **kwargs
):
4977 return self
.func(*args
, **kwargs
)
4982 return f
'{self.func.__module__}.{self.func.__qualname__}'
4985 class Namespace(types
.SimpleNamespace
):
4986 """Immutable namespace"""
4989 return iter(self
.__dict
__.values())
4993 return self
.__dict
__.items()
4996 MEDIA_EXTENSIONS
= Namespace(
4997 common_video
=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
4998 video
=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
4999 common_audio
=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5000 audio
=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
5001 thumbnails
=('jpg', 'png', 'webp'),
5002 storyboards
=('mhtml', ),
5003 subtitles
=('srt', 'vtt', 'ass', 'lrc'),
5004 manifests
=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5006 MEDIA_EXTENSIONS
.video
+= MEDIA_EXTENSIONS
.common_video
5007 MEDIA_EXTENSIONS
.audio
+= MEDIA_EXTENSIONS
.common_audio
5009 KNOWN_EXTENSIONS
= (*MEDIA_EXTENSIONS
.video
, *MEDIA_EXTENSIONS
.audio
, *MEDIA_EXTENSIONS
.manifests
)
5014 for retry in RetryManager(...):
5017 except SomeException as err:
5021 attempt
, _error
= 0, None
5023 def __init__(self
, _retries
, _error_callback
, **kwargs
):
5024 self
.retries
= _retries
or 0
5025 self
.error_callback
= functools
.partial(_error_callback
, **kwargs
)
5027 def _should_retry(self
):
5028 return self
._error
is not NO_DEFAULT
and self
.attempt
<= self
.retries
5032 if self
._error
is NO_DEFAULT
:
5037 def error(self
, value
):
5041 while self
._should
_retry
():
5042 self
.error
= NO_DEFAULT
5046 self
.error_callback(self
.error
, self
.attempt
, self
.retries
)
5049 def report_retry(e
, count
, retries
, *, sleep_func
, info
, warn
, error
=None, suffix
=None):
5050 """Utility function for reporting retries"""
5053 return error(f
'{e}. Giving up after {count - 1} retries') if count
> 1 else error(str(e
))
5058 elif isinstance(e
, ExtractorError
):
5059 e
= remove_end(str_or_none(e
.cause
) or e
.orig_msg
, '.')
5060 warn(f
'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5062 delay
= float_or_none(sleep_func(n
=count
- 1)) if callable(sleep_func
) else sleep_func
5064 info(f
'Sleeping {delay:.2f} seconds ...')
5068 def make_archive_id(ie
, video_id
):
5069 ie_key
= ie
if isinstance(ie
, str) else ie
.ie_key()
5070 return f
'{ie_key.lower()} {video_id}'
5073 def truncate_string(s
, left
, right
=0):
5074 assert left
> 3 and right
>= 0
5075 if s
is None or len(s
) <= left
+ right
:
5077 return f
'{s[:left - 3]}...{s[-right:] if right else ""}'
5080 def orderedSet_from_options(options
, alias_dict
, *, use_regex
=False, start
=None):
5081 assert 'all' in alias_dict
, '"all" alias is required'
5082 requested
= list(start
or [])
5084 discard
= val
.startswith('-')
5088 if val
in alias_dict
:
5089 val
= alias_dict
[val
] if not discard
else [
5090 i
[1:] if i
.startswith('-') else f
'-{i}' for i
in alias_dict
[val
]]
5091 # NB: Do not allow regex in aliases for performance
5092 requested
= orderedSet_from_options(val
, alias_dict
, start
=requested
)
5095 current
= (filter(re
.compile(val
, re
.I
).fullmatch
, alias_dict
['all']) if use_regex
5096 else [val
] if val
in alias_dict
['all'] else None)
5098 raise ValueError(val
)
5101 for item
in current
:
5102 while item
in requested
:
5103 requested
.remove(item
)
5105 requested
.extend(current
)
5107 return orderedSet(requested
)
5112 regex
= r
' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
5114 default
= ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
5115 'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
5116 'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
5117 ytdl_default
= ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
5118 'height', 'width', 'proto', 'vext', 'abr', 'aext',
5119 'fps', 'fs_approx', 'source', 'id')
5122 'vcodec': {'type': 'ordered', 'regex': True,
5123 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
5124 'acodec': {'type': 'ordered', 'regex': True,
5125 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
5126 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
5127 'order': ['dv', '(hdr)?12', r
'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
5128 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
5129 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
5130 'vext': {'type': 'ordered', 'field': 'video_ext',
5131 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
5132 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
5133 'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
5134 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
5135 'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
5136 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}
,
5137 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
5138 'field': ('vcodec', 'acodec'),
5139 'function': lambda it
: int(any(v
!= 'none' for v
in it
))},
5140 'ie_pref': {'priority': True, 'type': 'extractor'}
,
5141 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)}
,
5142 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}
,
5143 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1}
,
5144 'quality': {'convert': 'float', 'default': -1}
,
5145 'filesize': {'convert': 'bytes'}
,
5146 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'}
,
5147 'id': {'convert': 'string', 'field': 'format_id'}
,
5148 'height': {'convert': 'float_none'}
,
5149 'width': {'convert': 'float_none'}
,
5150 'fps': {'convert': 'float_none'}
,
5151 'channels': {'convert': 'float_none', 'field': 'audio_channels'}
,
5152 'tbr': {'convert': 'float_none'}
,
5153 'vbr': {'convert': 'float_none'}
,
5154 'abr': {'convert': 'float_none'}
,
5155 'asr': {'convert': 'float_none'}
,
5156 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1}
,
5158 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')}
,
5159 'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
5160 'function': lambda it
: next(filter(None, it
), None)},
5161 'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
5162 'function': lambda it
: next(filter(None, it
), None)},
5163 'ext': {'type': 'combined', 'field': ('vext', 'aext')}
,
5164 'res': {'type': 'multiple', 'field': ('height', 'width'),
5165 'function': lambda it
: (lambda l
: min(l
) if l
else 0)(tuple(filter(None, it
)))},
5167 # Actual field names
5168 'format_id': {'type': 'alias', 'field': 'id'}
,
5169 'preference': {'type': 'alias', 'field': 'ie_pref'}
,
5170 'language_preference': {'type': 'alias', 'field': 'lang'}
,
5171 'source_preference': {'type': 'alias', 'field': 'source'}
,
5172 'protocol': {'type': 'alias', 'field': 'proto'}
,
5173 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}
,
5174 'audio_channels': {'type': 'alias', 'field': 'channels'}
,
5177 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True}
,
5178 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True}
,
5179 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True}
,
5180 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True}
,
5181 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True}
,
5182 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True}
,
5183 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True}
,
5184 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True}
,
5185 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True}
,
5186 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True}
,
5187 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True}
,
5188 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True}
,
5189 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True}
,
5190 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True}
,
5191 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}
,
5192 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}
,
5193 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}
,
5194 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}
,
5195 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}
,
5196 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}
,
5199 def __init__(self
, ydl
, field_preference
):
5202 self
.evaluate_params(self
.ydl
.params
, field_preference
)
5203 if ydl
.params
.get('verbose'):
5204 self
.print_verbose_info(self
.ydl
.write_debug
)
5206 def _get_field_setting(self
, field
, key
):
5207 if field
not in self
.settings
:
5208 if key
in ('forced', 'priority'):
5210 self
.ydl
.deprecated_feature(f
'Using arbitrary fields ({field}) for format sorting is '
5211 'deprecated and may be removed in a future version')
5212 self
.settings
[field
] = {}
5213 propObj
= self
.settings
[field
]
5214 if key
not in propObj
:
5215 type = propObj
.get('type')
5217 default
= 'preference' if type == 'extractor' else (field
,) if type in ('combined', 'multiple') else field
5218 elif key
== 'convert':
5219 default
= 'order' if type == 'ordered' else 'float_string' if field
else 'ignore'
5221 default
= {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}
.get(key
, None)
5222 propObj
[key
] = default
5225 def _resolve_field_value(self
, field
, value
, convertNone
=False):
5230 value
= value
.lower()
5231 conversion
= self
._get
_field
_setting
(field
, 'convert')
5232 if conversion
== 'ignore':
5234 if conversion
== 'string':
5236 elif conversion
== 'float_none':
5237 return float_or_none(value
)
5238 elif conversion
== 'bytes':
5239 return parse_bytes(value
)
5240 elif conversion
== 'order':
5241 order_list
= (self
._use
_free
_order
and self
._get
_field
_setting
(field
, 'order_free')) or self
._get
_field
_setting
(field
, 'order')
5242 use_regex
= self
._get
_field
_setting
(field
, 'regex')
5243 list_length
= len(order_list
)
5244 empty_pos
= order_list
.index('') if '' in order_list
else list_length
+ 1
5245 if use_regex
and value
is not None:
5246 for i
, regex
in enumerate(order_list
):
5247 if regex
and re
.match(regex
, value
):
5248 return list_length
- i
5249 return list_length
- empty_pos
# not in list
5250 else: # not regex or value = None
5251 return list_length
- (order_list
.index(value
) if value
in order_list
else empty_pos
)
5253 if value
.isnumeric():
5256 self
.settings
[field
]['convert'] = 'string'
5259 def evaluate_params(self
, params
, sort_extractor
):
5260 self
._use
_free
_order
= params
.get('prefer_free_formats', False)
5261 self
._sort
_user
= params
.get('format_sort', [])
5262 self
._sort
_extractor
= sort_extractor
5264 def add_item(field
, reverse
, closest
, limit_text
):
5265 field
= field
.lower()
5266 if field
in self
._order
:
5268 self
._order
.append(field
)
5269 limit
= self
._resolve
_field
_value
(field
, limit_text
)
5272 'closest': False if limit
is None else closest
,
5273 'limit_text': limit_text
,
5275 if field
in self
.settings
:
5276 self
.settings
[field
].update(data
)
5278 self
.settings
[field
] = data
5281 tuple(field
for field
in self
.default
if self
._get
_field
_setting
(field
, 'forced'))
5282 + (tuple() if params
.get('format_sort_force', False)
5283 else tuple(field
for field
in self
.default
if self
._get
_field
_setting
(field
, 'priority')))
5284 + tuple(self
._sort
_user
) + tuple(sort_extractor
) + self
.default
)
5286 for item
in sort_list
:
5287 match
= re
.match(self
.regex
, item
)
5289 raise ExtractorError('Invalid format sort string "%s" given by extractor' % item
)
5290 field
= match
.group('field')
5293 if self
._get
_field
_setting
(field
, 'type') == 'alias':
5294 alias
, field
= field
, self
._get
_field
_setting
(field
, 'field')
5295 if self
._get
_field
_setting
(alias
, 'deprecated'):
5296 self
.ydl
.deprecated_feature(f
'Format sorting alias {alias} is deprecated and may '
5297 f
'be removed in a future version. Please use {field} instead')
5298 reverse
= match
.group('reverse') is not None
5299 closest
= match
.group('separator') == '~'
5300 limit_text
= match
.group('limit')
5302 has_limit
= limit_text
is not None
5303 has_multiple_fields
= self
._get
_field
_setting
(field
, 'type') == 'combined'
5304 has_multiple_limits
= has_limit
and has_multiple_fields
and not self
._get
_field
_setting
(field
, 'same_limit')
5306 fields
= self
._get
_field
_setting
(field
, 'field') if has_multiple_fields
else (field
,)
5307 limits
= limit_text
.split(':') if has_multiple_limits
else (limit_text
,) if has_limit
else tuple()
5308 limit_count
= len(limits
)
5309 for (i
, f
) in enumerate(fields
):
5310 add_item(f
, reverse
, closest
,
5311 limits
[i
] if i
< limit_count
5312 else limits
[0] if has_limit
and not has_multiple_limits
5315 def print_verbose_info(self
, write_debug
):
5317 write_debug('Sort order given by user: %s' % ', '.join(self
._sort
_user
))
5318 if self
._sort
_extractor
:
5319 write_debug('Sort order given by extractor: %s' % ', '.join(self
._sort
_extractor
))
5320 write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
5321 '+' if self
._get
_field
_setting
(field
, 'reverse') else '', field
,
5322 '%s%s(%s)' % ('~' if self
._get
_field
_setting
(field
, 'closest') else ':',
5323 self
._get
_field
_setting
(field
, 'limit_text'),
5324 self
._get
_field
_setting
(field
, 'limit'))
5325 if self
._get
_field
_setting
(field
, 'limit_text') is not None else '')
5326 for field
in self
._order
if self
._get
_field
_setting
(field
, 'visible')]))
5328 def _calculate_field_preference_from_value(self
, format
, field
, type, value
):
5329 reverse
= self
._get
_field
_setting
(field
, 'reverse')
5330 closest
= self
._get
_field
_setting
(field
, 'closest')
5331 limit
= self
._get
_field
_setting
(field
, 'limit')
5333 if type == 'extractor':
5334 maximum
= self
._get
_field
_setting
(field
, 'max')
5335 if value
is None or (maximum
is not None and value
>= maximum
):
5337 elif type == 'boolean':
5338 in_list
= self
._get
_field
_setting
(field
, 'in_list')
5339 not_in_list
= self
._get
_field
_setting
(field
, 'not_in_list')
5340 value
= 0 if ((in_list
is None or value
in in_list
) and (not_in_list
is None or value
not in not_in_list
)) else -1
5341 elif type == 'ordered':
5342 value
= self
._resolve
_field
_value
(field
, value
, True)
5344 # try to convert to number
5345 val_num
= float_or_none(value
, default
=self
._get
_field
_setting
(field
, 'default'))
5346 is_num
= self
._get
_field
_setting
(field
, 'convert') != 'string' and val_num
is not None
5350 return ((-10, 0) if value
is None
5351 else (1, value
, 0) if not is_num
# if a field has mixed strings and numbers, strings are sorted higher
5352 else (0, -abs(value
- limit
), value
- limit
if reverse
else limit
- value
) if closest
5353 else (0, value
, 0) if not reverse
and (limit
is None or value
<= limit
)
5354 else (0, -value
, 0) if limit
is None or (reverse
and value
== limit
) or value
> limit
5355 else (-1, value
, 0))
5357 def _calculate_field_preference(self
, format
, field
):
5358 type = self
._get
_field
_setting
(field
, 'type') # extractor, boolean, ordered, field, multiple
5359 get_value
= lambda f
: format
.get(self
._get
_field
_setting
(f
, 'field'))
5360 if type == 'multiple':
5361 type = 'field' # Only 'field' is allowed in multiple for now
5362 actual_fields
= self
._get
_field
_setting
(field
, 'field')
5364 value
= self
._get
_field
_setting
(field
, 'function')(get_value(f
) for f
in actual_fields
)
5366 value
= get_value(field
)
5367 return self
._calculate
_field
_preference
_from
_value
(format
, field
, type, value
)
5369 def calculate_preference(self
, format
):
5370 # Determine missing protocol
5371 if not format
.get('protocol'):
5372 format
['protocol'] = determine_protocol(format
)
5374 # Determine missing ext
5375 if not format
.get('ext') and 'url' in format
:
5376 format
['ext'] = determine_ext(format
['url'])
5377 if format
.get('vcodec') == 'none':
5378 format
['audio_ext'] = format
['ext'] if format
.get('acodec') != 'none' else 'none'
5379 format
['video_ext'] = 'none'
5381 format
['video_ext'] = format
['ext']
5382 format
['audio_ext'] = 'none'
5383 # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
5384 # format['preference'] = -1000
5386 if format
.get('preference') is None and format
.get('ext') == 'flv' and re
.match('[hx]265|he?vc?', format
.get('vcodec') or ''):
5387 # HEVC-over-FLV is out-of-spec by FLV's original spec
5388 # ref. https://trac.ffmpeg.org/ticket/6389
5389 # ref. https://github.com/yt-dlp/yt-dlp/pull/5821
5390 format
['preference'] = -100
5392 # Determine missing bitrates
5393 if format
.get('vcodec') == 'none':
5395 if format
.get('acodec') == 'none':
5397 if not format
.get('vbr') and format
.get('vcodec') != 'none':
5398 format
['vbr'] = try_call(lambda: format
['tbr'] - format
['abr']) or None
5399 if not format
.get('abr') and format
.get('acodec') != 'none':
5400 format
['abr'] = try_call(lambda: format
['tbr'] - format
['vbr']) or None
5401 if not format
.get('tbr'):
5402 format
['tbr'] = try_call(lambda: format
['vbr'] + format
['abr']) or None
5404 return tuple(self
._calculate
_field
_preference
(format
, field
) for field
in self
._order
)
5409 def __init__(self
, ydl
=None):
5412 def debug(self
, message
):
5414 self
._ydl
.write_debug(message
)
5416 def info(self
, message
):
5418 self
._ydl
.to_screen(message
)
5420 def warning(self
, message
, *, once
=False):
5422 self
._ydl
.report_warning(message
, once
)
5424 def error(self
, message
, *, is_error
=True):
5426 self
._ydl
.report_error(message
, is_error
=is_error
)
5428 def stdout(self
, message
):
5430 self
._ydl
.to_stdout(message
)
5432 def stderr(self
, message
):
5434 self
._ydl
.to_stderr(message
)