43 import xml
.etree
.ElementTree
45 from . import traversal
47 from ..compat
import functools
# isort: split
48 from ..compat
import (
49 compat_etree_fromstring
,
51 compat_HTMLParseError
,
55 from ..dependencies
import xattr
57 __name__
= __name__
.rsplit('.', 1)[0] # Pretend to be the parent module
59 # This is not clearly defined otherwise
60 compiled_regex_type
= type(re
.compile(''))
71 ENGLISH_MONTH_NAMES
= [
72 'January', 'February', 'March', 'April', 'May', 'June',
73 'July', 'August', 'September', 'October', 'November', 'December']
76 'en': ENGLISH_MONTH_NAMES
,
78 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
79 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
80 # these follow the genitive grammatical case (dopełniacz)
81 # some websites might be using nominative, which will require another month list
82 # https://en.wikibooks.org/wiki/Polish/Noun_cases
83 'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
84 'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
87 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
89 'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
90 'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
91 'EST': -5, 'EDT': -4, # Eastern
92 'CST': -6, 'CDT': -5, # Central
93 'MST': -7, 'MDT': -6, # Mountain
94 'PST': -8, 'PDT': -7 # Pacific
97 # needed for sanitizing filenames in restricted mode
98 ACCENT_CHARS
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
99 itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
100 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
130 '%Y-%m-%d %H:%M:%S.%f',
131 '%Y-%m-%d %H:%M:%S:%f',
134 '%Y-%m-%dT%H:%M:%SZ',
135 '%Y-%m-%dT%H:%M:%S.%fZ',
136 '%Y-%m-%dT%H:%M:%S.%f0Z',
138 '%Y-%m-%dT%H:%M:%S.%f',
141 '%b %d %Y at %H:%M:%S',
143 '%B %d %Y at %H:%M:%S',
147 DATE_FORMATS_DAY_FIRST
= list(DATE_FORMATS
)
148 DATE_FORMATS_DAY_FIRST
.extend([
159 DATE_FORMATS_MONTH_FIRST
= list(DATE_FORMATS
)
160 DATE_FORMATS_MONTH_FIRST
.extend([
168 PACKED_CODES_RE
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
169 JSON_LD_RE
= r
'(?is)<script[^>]+type=(["\']?
)application
/ld\
+json\
1[^
>]*>\s
*(?P
<json_ld
>{.+?}|\
[.+?\
])\s
*</script
>'
171 NUMBER_RE = r'\d
+(?
:\
.\d
+)?
'
175 def preferredencoding():
176 """Get preferred encoding.
178 Returns the best encoding scheme for the system, based on
179 locale.getpreferredencoding() and some further tweaks.
182 pref = locale.getpreferredencoding()
190 def write_json_file(obj, fn):
191 """ Encode obj as JSON and write it to fn, atomically if possible """
193 tf = tempfile.NamedTemporaryFile(
194 prefix=f'{os.path.basename(fn)}
.', dir=os.path.dirname(fn),
195 suffix='.tmp
', delete=False, mode='w
', encoding='utf
-8')
199 json.dump(obj, tf, ensure_ascii=False)
200 if sys.platform == 'win32
':
201 # Need to remove existing file on Windows, else os.rename raises
202 # WindowsError or FileExistsError.
203 with contextlib.suppress(OSError):
205 with contextlib.suppress(OSError):
208 os.chmod(tf.name, 0o666 & ~mask)
209 os.rename(tf.name, fn)
211 with contextlib.suppress(OSError):
216 def find_xpath_attr(node, xpath, key, val=None):
217 """ Find the xpath xpath[@key=val] """
218 assert re.match(r'^
[a
-zA
-Z_
-]+$
', key)
219 expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}
']")
220 return node.find(expr)
222 # On python2.6 the xml.etree.ElementTree.Element methods don't support
223 # the namespace parameter
226 def xpath_with_ns(path
, ns_map
):
227 components
= [c
.split(':') for c
in path
.split('/')]
231 replaced
.append(c
[0])
234 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
235 return '/'.join(replaced
)
238 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
239 def _find_xpath(xpath
):
240 return node
.find(xpath
)
242 if isinstance(xpath
, str):
243 n
= _find_xpath(xpath
)
251 if default
is not NO_DEFAULT
:
254 name
= xpath
if name
is None else name
255 raise ExtractorError('Could not find XML element %s' % name
)
261 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
262 n
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
)
263 if n
is None or n
== default
:
266 if default
is not NO_DEFAULT
:
269 name
= xpath
if name
is None else name
270 raise ExtractorError('Could not find XML element\'s text %s' % name
)
276 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
):
277 n
= find_xpath_attr(node
, xpath
, key
)
279 if default
is not NO_DEFAULT
:
282 name
= f
'{xpath}[@{key}]' if name
is None else name
283 raise ExtractorError('Could not find XML attribute %s' % name
)
289 def get_element_by_id(id, html
, **kwargs
):
290 """Return the content of the tag with the specified ID in the passed HTML document"""
291 return get_element_by_attribute('id', id, html
, **kwargs
)
294 def get_element_html_by_id(id, html
, **kwargs
):
295 """Return the html of the tag with the specified ID in the passed HTML document"""
296 return get_element_html_by_attribute('id', id, html
, **kwargs
)
299 def get_element_by_class(class_name
, html
):
300 """Return the content of the first tag with the specified class in the passed HTML document"""
301 retval
= get_elements_by_class(class_name
, html
)
302 return retval
[0] if retval
else None
305 def get_element_html_by_class(class_name
, html
):
306 """Return the html of the first tag with the specified class in the passed HTML document"""
307 retval
= get_elements_html_by_class(class_name
, html
)
308 return retval
[0] if retval
else None
311 def get_element_by_attribute(attribute
, value
, html
, **kwargs
):
312 retval
= get_elements_by_attribute(attribute
, value
, html
, **kwargs
)
313 return retval
[0] if retval
else None
316 def get_element_html_by_attribute(attribute
, value
, html
, **kargs
):
317 retval
= get_elements_html_by_attribute(attribute
, value
, html
, **kargs
)
318 return retval
[0] if retval
else None
321 def get_elements_by_class(class_name
, html
, **kargs
):
322 """Return the content of all tags with the specified class in the passed HTML document as a list"""
323 return get_elements_by_attribute(
324 'class', r
'[^\'"]*(?<=[\'"\s
])%s(?
=[\'"\s])[^\'"]*' % re.escape(class_name),
325 html, escape_value=False)
328 def get_elements_html_by_class(class_name, html):
329 """Return the html of all tags with the specified class in the passed HTML document as a list"""
330 return get_elements_html_by_attribute(
331 'class', r'[^
\'"]*(?<=[\'"\s
])%s(?
=[\'"\s])[^\'"]*' % re.escape(class_name),
332 html, escape_value=False)
335 def get_elements_by_attribute(*args, **kwargs):
336 """Return the content of the tag with the specified attribute in the passed HTML document"""
337 return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
340 def get_elements_html_by_attribute(*args, **kwargs):
341 """Return the html of the tag with the specified attribute in the passed HTML document"""
342 return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
345 def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w
:.-]+', escape_value=True):
347 Return the text (content) and the html (whole) of the tag with the specified
348 attribute in the passed HTML document
353 quote = '' if re.match(r'''[\s"'`
=<>]''', value) else '?'
355 value = re.escape(value) if escape_value else value
357 partial_element_re = rf'''(?x
)
359 (?
:\
s(?
:[^
>"']|"[^
"]*"|
'[^']*')*)?
360 \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
363 for m in re.finditer(partial_element_re, html):
364 content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
367 unescapeHTML(re.sub(r'^(?P<q>["\'])(?P
<content
>.*)(?P
=q
)$
', r'\g
<content
>', content, flags=re.DOTALL)),
372 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
374 HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
375 closing tag for the first opening tag it has encountered, and can be used
379 class HTMLBreakOnClosingTagException(Exception):
383 self.tagstack = collections.deque()
384 html.parser.HTMLParser.__init__(self)
389 def __exit__(self, *_):
393 # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
394 # so data remains buffered; we no longer have any interest in it, thus
395 # override this method to discard it
398 def handle_starttag(self, tag, _):
399 self.tagstack.append(tag)
401 def handle_endtag(self, tag):
402 if not self.tagstack:
403 raise compat_HTMLParseError('no tags
in the stack
')
405 inner_tag = self.tagstack.pop()
409 raise compat_HTMLParseError(f'matching opening tag
for closing {tag} tag
not found
')
410 if not self.tagstack:
411 raise self.HTMLBreakOnClosingTagException()
414 # XXX: This should be far less strict
415 def get_element_text_and_html_by_tag(tag, html):
417 For the first element with the specified tag in the passed HTML document
418 return its' content (text
) and the whole
element (html
)
420 def find_or_raise(haystack, needle, exc):
422 return haystack.index(needle)
425 closing_tag = f'</{tag}>'
426 whole_start = find_or_raise(
427 html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
428 content_start = find_or_raise(
429 html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
430 content_start += whole_start + 1
431 with HTMLBreakOnClosingTagParser() as parser:
432 parser.feed(html[whole_start:content_start])
433 if not parser.tagstack or parser.tagstack[0] != tag:
434 raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
435 offset = content_start
436 while offset < len(html):
437 next_closing_tag_start = find_or_raise(
438 html[offset:], closing_tag,
439 compat_HTMLParseError(f'closing {tag} tag not found'))
440 next_closing_tag_end = next_closing_tag_start + len(closing_tag)
442 parser.feed(html[offset:offset + next_closing_tag_end])
443 offset += next_closing_tag_end
444 except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
445 return html[content_start:offset + next_closing_tag_start], \
446 html[whole_start:offset + next_closing_tag_end]
447 raise compat_HTMLParseError('unexpected end of html')
450 class HTMLAttributeParser(html.parser.HTMLParser):
451 """Trivial HTML parser to gather the attributes
for a single element
"""
455 html.parser.HTMLParser.__init__(self)
457 def handle_starttag(self, tag, attrs):
458 self.attrs = dict(attrs)
459 raise compat_HTMLParseError('done')
462 class HTMLListAttrsParser(html.parser.HTMLParser):
463 """HTML parser to gather the attributes
for the elements of a
list"""
466 html.parser.HTMLParser.__init__(self)
470 def handle_starttag(self, tag, attrs):
471 if tag == 'li' and self._level == 0:
472 self.items.append(dict(attrs))
475 def handle_endtag(self, tag):
479 def extract_attributes(html_element):
480 """Given a string
for an HTML element such
as
482 a
="foo" B
="bar" c
="&98;az" d
=boz
483 empty
= noval entity
="&"
486 Decode
and return a dictionary of attributes
.
488 'a': 'foo', 'b': 'bar', c
: 'baz', d
: 'boz',
489 'empty': '', 'noval': None, 'entity': '&',
490 'sq': '"', 'dq': '\''
493 parser = HTMLAttributeParser()
494 with contextlib.suppress(compat_HTMLParseError):
495 parser.feed(html_element)
500 def parse_list(webpage):
501 """Given a string
for an series of HTML
<li
> elements
,
502 return a dictionary of their attributes
"""
503 parser = HTMLListAttrsParser()
509 def clean_html(html):
510 """Clean an HTML snippet into a readable string
"""
512 if html is None: # Convenience for sanitizing descriptions etc.
515 html = re.sub(r'\s+', ' ', html)
516 html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
517 html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
519 html = re.sub('<.*?>', '', html)
520 # Replace html entities
521 html = unescapeHTML(html)
525 class LenientJSONDecoder(json.JSONDecoder):
527 def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs):
528 self.transform_source, self.ignore_extra = transform_source, ignore_extra
529 self._close_attempts = 2 * close_objects
530 super().__init__(*args, **kwargs)
533 def _close_object(err):
534 doc = err.doc[:err.pos]
535 # We need to add comma first to get the correct error message
536 if err.msg.startswith('Expecting \',\''):
538 elif not doc.endswith(','):
541 if err.msg.startswith('Expecting property name'):
542 return doc[:-1] + '}'
543 elif err.msg.startswith('Expecting value'):
544 return doc[:-1] + ']'
547 if self.transform_source:
548 s = self.transform_source(s)
549 for attempt in range(self._close_attempts + 1):
551 if self.ignore_extra:
552 return self.raw_decode(s.lstrip())[0]
553 return super().decode(s)
554 except json.JSONDecodeError as e:
557 elif attempt < self._close_attempts:
558 s = self._close_object(e)
561 raise type(e)(f'{e.msg} in {s[e.pos - 10:e.pos + 10]!r}', s, e.pos)
562 assert False, 'Too many attempts to decode JSON'
565 def sanitize_open(filename, open_mode):
566 """Try to
open the given filename
, and slightly tweak it
if this fails
.
568 Attempts to
open the given filename
. If this fails
, it tries to change
569 the filename slightly
, step by step
, until it
's either able to open it
570 or it fails and raises a final exception, like the standard open()
573 It returns the tuple (stream, definitive_file_name).
576 if sys.platform == 'win32
':
579 # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
580 with contextlib.suppress(io.UnsupportedOperation):
581 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
582 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
584 for attempt in range(2):
587 if sys.platform == 'win32
':
588 # FIXME: An exclusive lock also locks the file from being read.
589 # Since windows locks are mandatory, don't lock the
file on
windows (for now
).
590 # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
591 raise LockingUnsupportedError()
592 stream
= locked_file(filename
, open_mode
, block
=False).__enter
__()
594 stream
= open(filename
, open_mode
)
595 return stream
, filename
596 except OSError as err
:
597 if attempt
or err
.errno
in (errno
.EACCES
,):
599 old_filename
, filename
= filename
, sanitize_path(filename
)
600 if old_filename
== filename
:
604 def timeconvert(timestr
):
605 """Convert RFC 2822 defined time string into system timestamp"""
607 timetuple
= email
.utils
.parsedate_tz(timestr
)
608 if timetuple
is not None:
609 timestamp
= email
.utils
.mktime_tz(timetuple
)
613 def sanitize_filename(s
, restricted
=False, is_id
=NO_DEFAULT
):
614 """Sanitizes a string so it could be used as part of a filename.
615 @param restricted Use a stricter subset of allowed characters
616 @param is_id Whether this is an ID that should be kept unchanged if possible.
617 If unset, yt-dlp's new sanitization rules are in effect
622 def replace_insane(char
):
623 if restricted
and char
in ACCENT_CHARS
:
624 return ACCENT_CHARS
[char
]
625 elif not restricted
and char
== '\n':
627 elif is_id
is NO_DEFAULT
and not restricted
and char
in '"*:<>?|/\\':
628 # Replace with their full-width unicode counterparts
629 return {'/': '\u29F8', '\\': '\u29f9'}
.get(char
, chr(ord(char
) + 0xfee0))
630 elif char
== '?' or ord(char
) < 32 or ord(char
) == 127:
633 return '' if restricted
else '\''
635 return '\0_\0-' if restricted
else '\0 \0-'
636 elif char
in '\\/|*<>':
638 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace() or ord(char
) > 127):
639 return '' if unicodedata
.category(char
)[0] in 'CM' else '\0_'
642 # Replace look-alike Unicode glyphs
643 if restricted
and (is_id
is NO_DEFAULT
or not is_id
):
644 s
= unicodedata
.normalize('NFKC', s
)
645 s
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) # Handle timestamps
646 result
= ''.join(map(replace_insane
, s
))
647 if is_id
is NO_DEFAULT
:
648 result
= re
.sub(r
'(\0.)(?:(?=\1)..)+', r
'\1', result
) # Remove repeated substitute chars
649 STRIP_RE
= r
'(?:\0.|[ _-])*'
650 result
= re
.sub(f
'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result
) # Remove substitute chars from start/end
651 result
= result
.replace('\0', '') or '_'
654 while '__' in result
:
655 result
= result
.replace('__', '_')
656 result
= result
.strip('_')
657 # Common case of "Foreign band name - English song title"
658 if restricted
and result
.startswith('-_'):
660 if result
.startswith('-'):
661 result
= '_' + result
[len('-'):]
662 result
= result
.lstrip('.')
668 def sanitize_path(s
, force
=False):
669 """Sanitizes and normalizes path on Windows"""
670 # XXX: this handles drive relative paths (c:sth) incorrectly
671 if sys
.platform
== 'win32':
673 drive_or_unc
, _
= os
.path
.splitdrive(s
)
679 norm_path
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
)
683 path_part
if path_part
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
)
684 for path_part
in norm_path
]
686 sanitized_path
.insert(0, drive_or_unc
+ os
.path
.sep
)
687 elif force
and s
and s
[0] == os
.path
.sep
:
688 sanitized_path
.insert(0, os
.path
.sep
)
689 # TODO: Fix behavioral differences <3.12
690 # The workaround using `normpath` only superficially passes tests
691 # Ref: https://github.com/python/cpython/pull/100351
692 return os
.path
.normpath(os
.path
.join(*sanitized_path
))
695 def sanitize_url(url
, *, scheme
='http'):
696 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
697 # the number of unwanted failures due to missing protocol
700 elif url
.startswith('//'):
701 return f
'{scheme}:{url}'
702 # Fix some common typos seen so far
704 # https://github.com/ytdl-org/youtube-dl/issues/15649
705 (r
'^httpss://', r
'https://'),
706 # https://bx1.be/lives/direct-tv/
707 (r
'^rmtp([es]?)://', r
'rtmp\1://'),
709 for mistake
, fixup
in COMMON_TYPOS
:
710 if re
.match(mistake
, url
):
711 return re
.sub(mistake
, fixup
, url
)
715 def extract_basic_auth(url
):
716 parts
= urllib
.parse
.urlsplit(url
)
717 if parts
.username
is None:
719 url
= urllib
.parse
.urlunsplit(parts
._replace
(netloc
=(
720 parts
.hostname
if parts
.port
is None
721 else '%s:%d' % (parts
.hostname
, parts
.port
))))
722 auth_payload
= base64
.b64encode(
723 ('%s:%s' % (parts
.username
, parts
.password
or '')).encode())
724 return url
, f
'Basic {auth_payload.decode()}'
728 """Expand shell variables and ~"""
729 return os
.path
.expandvars(compat_expanduser(s
))
732 def orderedSet(iterable
, *, lazy
=False):
733 """Remove all duplicates from the input iterable"""
735 seen
= [] # Do not use set since the items can be unhashable
741 return _iter() if lazy
else list(_iter())
744 def _htmlentity_transform(entity_with_semicolon
):
745 """Transforms an HTML entity to a character."""
746 entity
= entity_with_semicolon
[:-1]
748 # Known non-numeric HTML entity
749 if entity
in html
.entities
.name2codepoint
:
750 return chr(html
.entities
.name2codepoint
[entity
])
752 # TODO: HTML5 allows entities without a semicolon.
753 # E.g. 'Éric' should be decoded as 'Éric'.
754 if entity_with_semicolon
in html
.entities
.html5
:
755 return html
.entities
.html5
[entity_with_semicolon
]
757 mobj
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
)
759 numstr
= mobj
.group(1)
760 if numstr
.startswith('x'):
762 numstr
= '0%s' % numstr
765 # See https://github.com/ytdl-org/youtube-dl/issues/7518
766 with contextlib
.suppress(ValueError):
767 return chr(int(numstr
, base
))
769 # Unknown entity in name, return its literal representation
770 return '&%s;' % entity
776 assert isinstance(s
, str)
779 r
'&([^&;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
)
782 def escapeHTML(text
):
785 .replace('&', '&')
786 .replace('<', '<')
787 .replace('>', '>')
788 .replace('"', '"')
789 .replace("'", ''')
793 class netrc_from_content(netrc
.netrc
):
794 def __init__(self
, content
):
795 self
.hosts
, self
.macros
= {}, {}
796 with io
.StringIO(content
) as stream
:
797 self
._parse
('-', stream
, False)
800 class Popen(subprocess
.Popen
):
801 if sys
.platform
== 'win32':
802 _startupinfo
= subprocess
.STARTUPINFO()
803 _startupinfo
.dwFlags |
= subprocess
.STARTF_USESHOWWINDOW
808 def _fix_pyinstaller_ld_path(env
):
809 """Restore LD_LIBRARY_PATH when using PyInstaller
810 Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
811 https://github.com/yt-dlp/yt-dlp/issues/4573
813 if not hasattr(sys
, '_MEIPASS'):
817 orig
= env
.get(f
'{key}_ORIG')
823 _fix('LD_LIBRARY_PATH') # Linux
824 _fix('DYLD_LIBRARY_PATH') # macOS
826 def __init__(self
, args
, *remaining
, env
=None, text
=False, shell
=False, **kwargs
):
828 env
= os
.environ
.copy()
829 self
._fix
_pyinstaller
_ld
_path
(env
)
831 self
.__text
_mode
= kwargs
.get('encoding') or kwargs
.get('errors') or text
or kwargs
.get('universal_newlines')
833 kwargs
['universal_newlines'] = True # For 3.6 compatibility
834 kwargs
.setdefault('encoding', 'utf-8')
835 kwargs
.setdefault('errors', 'replace')
837 if shell
and compat_os_name
== 'nt' and kwargs
.get('executable') is None:
838 if not isinstance(args
, str):
839 args
= ' '.join(compat_shlex_quote(a
) for a
in args
)
841 args
= f
'{self.__comspec()} /Q /S /D /V:OFF /C "{args}"'
843 super().__init
__(args
, *remaining
, env
=env
, shell
=shell
, **kwargs
, startupinfo
=self
._startupinfo
)
846 comspec
= os
.environ
.get('ComSpec') or os
.path
.join(
847 os
.environ
.get('SystemRoot', ''), 'System32', 'cmd.exe')
848 if os
.path
.isabs(comspec
):
850 raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set')
852 def communicate_or_kill(self
, *args
, **kwargs
):
854 return self
.communicate(*args
, **kwargs
)
855 except BaseException
: # Including KeyboardInterrupt
856 self
.kill(timeout
=None)
859 def kill(self
, *, timeout
=0):
862 self
.wait(timeout
=timeout
)
865 def run(cls
, *args
, timeout
=None, **kwargs
):
866 with cls(*args
, **kwargs
) as proc
:
867 default
= '' if proc
.__text
_mode
else b
''
868 stdout
, stderr
= proc
.communicate_or_kill(timeout
=timeout
)
869 return stdout
or default
, stderr
or default
, proc
.returncode
872 def encodeArgument(s
):
873 # Legacy code that uses byte strings
874 # Uncomment the following line after fixing all post processors
875 # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
876 return s
if isinstance(s
, str) else s
.decode('ascii')
879 _timetuple
= collections
.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
882 def timetuple_from_msec(msec
):
883 secs
, msec
= divmod(msec
, 1000)
884 mins
, secs
= divmod(secs
, 60)
885 hrs
, mins
= divmod(mins
, 60)
886 return _timetuple(hrs
, mins
, secs
, msec
)
889 def formatSeconds(secs
, delim
=':', msec
=False):
890 time
= timetuple_from_msec(secs
* 1000)
892 ret
= '%d%s%02d%s%02d' % (time
.hours
, delim
, time
.minutes
, delim
, time
.seconds
)
894 ret
= '%d%s%02d' % (time
.minutes
, delim
, time
.seconds
)
896 ret
= '%d' % time
.seconds
897 return '%s.%03d' % (ret
, time
.milliseconds
) if msec
else ret
900 def bug_reports_message(before
=';'):
901 from ..update
import REPOSITORY
903 msg
= (f
'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
904 'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
906 before
= before
.rstrip()
907 if not before
or before
.endswith(('.', '!', '?')):
908 msg
= msg
[0].title() + msg
[1:]
910 return (before
+ ' ' if before
else '') + msg
913 class YoutubeDLError(Exception):
914 """Base exception for YoutubeDL errors."""
917 def __init__(self
, msg
=None):
920 elif self
.msg
is None:
921 self
.msg
= type(self
).__name
__
922 super().__init
__(self
.msg
)
925 class ExtractorError(YoutubeDLError
):
926 """Error during info extraction."""
928 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None, ie
=None):
929 """ tb, if given, is the original traceback (so that it can be printed out).
930 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
932 from ..networking
.exceptions
import network_exceptions
933 if sys
.exc_info()[0] in network_exceptions
:
936 self
.orig_msg
= str(msg
)
938 self
.expected
= expected
940 self
.video_id
= video_id
942 self
.exc_info
= sys
.exc_info() # preserve original exception
943 if isinstance(self
.exc_info
[1], ExtractorError
):
944 self
.exc_info
= self
.exc_info
[1].exc_info
945 super().__init
__(self
.__msg
)
950 format_field(self
.ie
, None, '[%s] '),
951 format_field(self
.video_id
, None, '%s: '),
953 format_field(self
.cause
, None, ' (caused by %r)'),
954 '' if self
.expected
else bug_reports_message()))
956 def format_traceback(self
):
957 return join_nonempty(
958 self
.traceback
and ''.join(traceback
.format_tb(self
.traceback
)),
959 self
.cause
and ''.join(traceback
.format_exception(None, self
.cause
, self
.cause
.__traceback
__)[1:]),
962 def __setattr__(self
, name
, value
):
963 super().__setattr
__(name
, value
)
964 if getattr(self
, 'msg', None) and name
not in ('msg', 'args'):
965 self
.msg
= self
.__msg
or type(self
).__name
__
966 self
.args
= (self
.msg
, ) # Cannot be property
969 class UnsupportedError(ExtractorError
):
970 def __init__(self
, url
):
972 'Unsupported URL: %s' % url
, expected
=True)
976 class RegexNotFoundError(ExtractorError
):
977 """Error when a regex didn't match"""
981 class GeoRestrictedError(ExtractorError
):
982 """Geographic restriction Error exception.
984 This exception may be thrown when a video is not available from your
985 geographic location due to geographic restrictions imposed by a website.
988 def __init__(self
, msg
, countries
=None, **kwargs
):
989 kwargs
['expected'] = True
990 super().__init
__(msg
, **kwargs
)
991 self
.countries
= countries
994 class UserNotLive(ExtractorError
):
995 """Error when a channel/user is not live"""
997 def __init__(self
, msg
=None, **kwargs
):
998 kwargs
['expected'] = True
999 super().__init
__(msg
or 'The channel is not currently live', **kwargs
)
1002 class DownloadError(YoutubeDLError
):
1003 """Download Error exception.
1005 This exception may be thrown by FileDownloader objects if they are not
1006 configured to continue on errors. They will contain the appropriate
1010 def __init__(self
, msg
, exc_info
=None):
1011 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1012 super().__init
__(msg
)
1013 self
.exc_info
= exc_info
1016 class EntryNotInPlaylist(YoutubeDLError
):
1017 """Entry not in playlist exception.
1019 This exception will be thrown by YoutubeDL when a requested entry
1020 is not found in the playlist info_dict
1022 msg
= 'Entry not found in info'
1025 class SameFileError(YoutubeDLError
):
1026 """Same File exception.
1028 This exception will be thrown by FileDownloader objects if they detect
1029 multiple files would have to be downloaded to the same file on disk.
1031 msg
= 'Fixed output name but more than one file to download'
1033 def __init__(self
, filename
=None):
1034 if filename
is not None:
1035 self
.msg
+= f
': {filename}'
1036 super().__init
__(self
.msg
)
1039 class PostProcessingError(YoutubeDLError
):
1040 """Post Processing exception.
1042 This exception may be raised by PostProcessor's .run() method to
1043 indicate an error in the postprocessing task.
1047 class DownloadCancelled(YoutubeDLError
):
1048 """ Exception raised when the download queue should be interrupted """
1049 msg
= 'The download was cancelled'
1052 class ExistingVideoReached(DownloadCancelled
):
1053 """ --break-on-existing triggered """
1054 msg
= 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1057 class RejectedVideoReached(DownloadCancelled
):
1058 """ --break-match-filter triggered """
1059 msg
= 'Encountered a video that did not match filter, stopping due to --break-match-filter'
1062 class MaxDownloadsReached(DownloadCancelled
):
1063 """ --max-downloads limit has been reached. """
1064 msg
= 'Maximum number of downloads reached, stopping due to --max-downloads'
1067 class ReExtractInfo(YoutubeDLError
):
1068 """ Video info needs to be re-extracted. """
1070 def __init__(self
, msg
, expected
=False):
1071 super().__init
__(msg
)
1072 self
.expected
= expected
1075 class ThrottledDownload(ReExtractInfo
):
1076 """ Download speed below --throttled-rate. """
1077 msg
= 'The download speed is below throttle limit'
1080 super().__init
__(self
.msg
, expected
=False)
1083 class UnavailableVideoError(YoutubeDLError
):
1084 """Unavailable Format exception.
1086 This exception will be thrown when a video is requested
1087 in a format that is not available for that video.
1089 msg
= 'Unable to download video'
1091 def __init__(self
, err
=None):
1093 self
.msg
+= f
': {err}'
1094 super().__init
__(self
.msg
)
1097 class ContentTooShortError(YoutubeDLError
):
1098 """Content Too Short exception.
1100 This exception may be raised by FileDownloader objects when a file they
1101 download is too small for what the server announced first, indicating
1102 the connection was probably interrupted.
1105 def __init__(self
, downloaded
, expected
):
1106 super().__init
__(f
'Downloaded {downloaded} bytes, expected {expected} bytes')
1108 self
.downloaded
= downloaded
1109 self
.expected
= expected
1112 class XAttrMetadataError(YoutubeDLError
):
1113 def __init__(self
, code
=None, msg
='Unknown error'):
1114 super().__init
__(msg
)
1118 # Parsing code and msg
1119 if (self
.code
in (errno
.ENOSPC
, errno
.EDQUOT
)
1120 or 'No space left' in self
.msg
or 'Disk quota exceeded' in self
.msg
):
1121 self
.reason
= 'NO_SPACE'
1122 elif self
.code
== errno
.E2BIG
or 'Argument list too long' in self
.msg
:
1123 self
.reason
= 'VALUE_TOO_LONG'
1125 self
.reason
= 'NOT_SUPPORTED'
1128 class XAttrUnavailableError(YoutubeDLError
):
1132 def is_path_like(f
):
1133 return isinstance(f
, (str, bytes, os
.PathLike
))
1136 def extract_timezone(date_str
):
1139 ^.{8,}? # >=8 char non-TZ prefix, if present
1140 (?P<tz>Z| # just the UTC Z, or
1141 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
1142 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1143 [ ]? # optional space
1144 (?P<sign>\+|-) # +/-
1145 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
1149 m
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
)
1150 timezone
= TIMEZONE_NAMES
.get(m
and m
.group('tz').strip())
1151 if timezone
is not None:
1152 date_str
= date_str
[:-len(m
.group('tz'))]
1153 timezone
= datetime
.timedelta(hours
=timezone
or 0)
1155 date_str
= date_str
[:-len(m
.group('tz'))]
1156 if not m
.group('sign'):
1157 timezone
= datetime
.timedelta()
1159 sign
= 1 if m
.group('sign') == '+' else -1
1160 timezone
= datetime
.timedelta(
1161 hours
=sign
* int(m
.group('hours')),
1162 minutes
=sign
* int(m
.group('minutes')))
1163 return timezone
, date_str
1166 def parse_iso8601(date_str
, delimiter
='T', timezone
=None):
1167 """ Return a UNIX timestamp from the given date """
1169 if date_str
is None:
1172 date_str
= re
.sub(r
'\.[0-9]+', '', date_str
)
1174 if timezone
is None:
1175 timezone
, date_str
= extract_timezone(date_str
)
1177 with contextlib
.suppress(ValueError):
1178 date_format
= f
'%Y-%m-%d{delimiter}%H:%M:%S'
1179 dt
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
1180 return calendar
.timegm(dt
.timetuple())
1183 def date_formats(day_first
=True):
1184 return DATE_FORMATS_DAY_FIRST
if day_first
else DATE_FORMATS_MONTH_FIRST
1187 def unified_strdate(date_str
, day_first
=True):
1188 """Return a string with the date in the format YYYYMMDD"""
1190 if date_str
is None:
1194 date_str
= date_str
.replace(',', ' ')
1195 # Remove AM/PM + timezone
1196 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
1197 _
, date_str
= extract_timezone(date_str
)
1199 for expression
in date_formats(day_first
):
1200 with contextlib
.suppress(ValueError):
1201 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
1202 if upload_date
is None:
1203 timetuple
= email
.utils
.parsedate_tz(date_str
)
1205 with contextlib
.suppress(ValueError):
1206 upload_date
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d')
1207 if upload_date
is not None:
1208 return str(upload_date
)
1211 def unified_timestamp(date_str
, day_first
=True):
1212 if not isinstance(date_str
, str):
1215 date_str
= re
.sub(r
'\s+', ' ', re
.sub(
1216 r
'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str
))
1218 pm_delta
= 12 if re
.search(r
'(?i)PM', date_str
) else 0
1219 timezone
, date_str
= extract_timezone(date_str
)
1221 # Remove AM/PM + timezone
1222 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
1224 # Remove unrecognized timezones from ISO 8601 alike timestamps
1225 m
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
)
1227 date_str
= date_str
[:-len(m
.group('tz'))]
1229 # Python only supports microseconds, so remove nanoseconds
1230 m
= re
.search(r
'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str
)
1232 date_str
= m
.group(1)
1234 for expression
in date_formats(day_first
):
1235 with contextlib
.suppress(ValueError):
1236 dt
= datetime
.datetime
.strptime(date_str
, expression
) - timezone
+ datetime
.timedelta(hours
=pm_delta
)
1237 return calendar
.timegm(dt
.timetuple())
1239 timetuple
= email
.utils
.parsedate_tz(date_str
)
1241 return calendar
.timegm(timetuple
) + pm_delta
* 3600 - timezone
.total_seconds()
1244 def determine_ext(url
, default_ext
='unknown_video'):
1245 if url
is None or '.' not in url
:
1247 guess
= url
.partition('?')[0].rpartition('.')[2]
1248 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
1250 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1251 elif guess
.rstrip('/') in KNOWN_EXTENSIONS
:
1252 return guess
.rstrip('/')
1257 def subtitles_filename(filename
, sub_lang
, sub_format
, expected_real_ext
=None):
1258 return replace_extension(filename
, sub_lang
+ '.' + sub_format
, expected_real_ext
)
1261 def datetime_from_str(date_str
, precision
='auto', format
='%Y%m%d'):
1263 Return a datetime object from a string.
1265 (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1267 @param format strftime format of DATE
1268 @param precision Round the datetime object: auto|microsecond|second|minute|hour|day
1269 auto: round to the unit provided in date_str (if applicable).
1271 auto_precision
= False
1272 if precision
== 'auto':
1273 auto_precision
= True
1274 precision
= 'microsecond'
1275 today
= datetime_round(datetime
.datetime
.now(datetime
.timezone
.utc
), precision
)
1276 if date_str
in ('now', 'today'):
1278 if date_str
== 'yesterday':
1279 return today
- datetime
.timedelta(days
=1)
1281 r
'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1283 if match
is not None:
1284 start_time
= datetime_from_str(match
.group('start'), precision
, format
)
1285 time
= int(match
.group('time')) * (-1 if match
.group('sign') == '-' else 1)
1286 unit
= match
.group('unit')
1287 if unit
== 'month' or unit
== 'year':
1288 new_date
= datetime_add_months(start_time
, time
* 12 if unit
== 'year' else time
)
1294 delta
= datetime
.timedelta(**{unit + 's': time}
)
1295 new_date
= start_time
+ delta
1297 return datetime_round(new_date
, unit
)
1300 return datetime_round(datetime
.datetime
.strptime(date_str
, format
), precision
)
1303 def date_from_str(date_str
, format
='%Y%m%d', strict
=False):
1305 Return a date object from a string using datetime_from_str
1307 @param strict Restrict allowed patterns to "YYYYMMDD" and
1308 (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1310 if strict
and not re
.fullmatch(r
'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str
):
1311 raise ValueError(f
'Invalid date format "{date_str}"')
1312 return datetime_from_str(date_str
, precision
='microsecond', format
=format
).date()
1315 def datetime_add_months(dt
, months
):
1316 """Increment/Decrement a datetime object by months."""
1317 month
= dt
.month
+ months
- 1
1318 year
= dt
.year
+ month
// 12
1319 month
= month
% 12 + 1
1320 day
= min(dt
.day
, calendar
.monthrange(year
, month
)[1])
1321 return dt
.replace(year
, month
, day
)
1324 def datetime_round(dt
, precision
='day'):
1326 Round a datetime object's time to a specific precision
1328 if precision
== 'microsecond':
1337 roundto
= lambda x
, n
: ((x
+ n
/ 2) // n
) * n
1338 timestamp
= roundto(calendar
.timegm(dt
.timetuple()), unit_seconds
[precision
])
1339 return datetime
.datetime
.fromtimestamp(timestamp
, datetime
.timezone
.utc
)
1342 def hyphenate_date(date_str
):
1344 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1345 match
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
)
1346 if match
is not None:
1347 return '-'.join(match
.groups())
1353 """Represents a time interval between two dates"""
1355 def __init__(self
, start
=None, end
=None):
1356 """start and end must be strings in the format accepted by date"""
1357 if start
is not None:
1358 self
.start
= date_from_str(start
, strict
=True)
1360 self
.start
= datetime
.datetime
.min.date()
1362 self
.end
= date_from_str(end
, strict
=True)
1364 self
.end
= datetime
.datetime
.max.date()
1365 if self
.start
> self
.end
:
1366 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
1370 """Returns a range that only contains the given day"""
1371 return cls(day
, day
)
1373 def __contains__(self
, date
):
1374 """Check if the date is in the range"""
1375 if not isinstance(date
, datetime
.date
):
1376 date
= date_from_str(date
)
1377 return self
.start
<= date
<= self
.end
1380 return f
'{__name__}.{type(self).__name__}({self.start.isoformat()!r}, {self.end.isoformat()!r})'
1383 return f
'{self.start} to {self.end}'
1385 def __eq__(self
, other
):
1386 return (isinstance(other
, DateRange
)
1387 and self
.start
== other
.start
and self
.end
== other
.end
)
1391 def system_identifier():
1392 python_implementation
= platform
.python_implementation()
1393 if python_implementation
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
1394 python_implementation
+= ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
1396 with contextlib
.suppress(OSError): # We may not have access to the executable
1397 libc_ver
= platform
.libc_ver()
1399 return 'Python %s (%s %s %s) - %s (%s%s)' % (
1400 platform
.python_version(),
1401 python_implementation
,
1403 platform
.architecture()[0],
1404 platform
.platform(),
1405 ssl
.OPENSSL_VERSION
,
1406 format_field(join_nonempty(*libc_ver
, delim
=' '), None, ', %s'),
1411 def get_windows_version():
1412 ''' Get Windows version. returns () if it's not running on Windows '''
1413 if compat_os_name
== 'nt':
1414 return version_tuple(platform
.win32_ver()[1])
1419 def write_string(s
, out
=None, encoding
=None):
1420 assert isinstance(s
, str)
1421 out
= out
or sys
.stderr
1422 # `sys.stderr` might be `None` (Ref: https://github.com/pyinstaller/pyinstaller/pull/7217)
1426 if compat_os_name
== 'nt' and supports_terminal_sequences(out
):
1427 s
= re
.sub(r
'([\r\n]+)', r
' \1', s
)
1429 enc
, buffer = None, out
1430 # `mode` might be `None` (Ref: https://github.com/yt-dlp/yt-dlp/issues/8816)
1431 if 'b' in (getattr(out
, 'mode', None) or ''):
1432 enc
= encoding
or preferredencoding()
1433 elif hasattr(out
, 'buffer'):
1435 enc
= encoding
or getattr(out
, 'encoding', None) or preferredencoding()
1437 buffer.write(s
.encode(enc
, 'ignore') if enc
else s
)
1441 # TODO: Use global logger
1442 def deprecation_warning(msg
, *, printer
=None, stacklevel
=0, **kwargs
):
1443 from .. import _IN_CLI
1445 if msg
in deprecation_warning
._cache
:
1447 deprecation_warning
._cache
.add(msg
)
1449 return printer(f
'{msg}{bug_reports_message()}', **kwargs
)
1450 return write_string(f
'ERROR: {msg}{bug_reports_message()}\n', **kwargs
)
1453 warnings
.warn(DeprecationWarning(msg
), stacklevel
=stacklevel
+ 3)
1456 deprecation_warning
._cache
= set()
1459 def bytes_to_intlist(bs
):
1462 if isinstance(bs
[0], int): # Python 3
1465 return [ord(c
) for c
in bs
]
1468 def intlist_to_bytes(xs
):
1471 return struct
.pack('%dB' % len(xs
), *xs
)
1474 class LockingUnsupportedError(OSError):
1475 msg
= 'File locking is not supported'
1478 super().__init
__(self
.msg
)
1481 # Cross-platform file locking
1482 if sys
.platform
== 'win32':
1484 import ctypes
.wintypes
1487 class OVERLAPPED(ctypes
.Structure
):
1489 ('Internal', ctypes
.wintypes
.LPVOID
),
1490 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
1491 ('Offset', ctypes
.wintypes
.DWORD
),
1492 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
1493 ('hEvent', ctypes
.wintypes
.HANDLE
),
1496 kernel32
= ctypes
.WinDLL('kernel32')
1497 LockFileEx
= kernel32
.LockFileEx
1498 LockFileEx
.argtypes
= [
1499 ctypes
.wintypes
.HANDLE
, # hFile
1500 ctypes
.wintypes
.DWORD
, # dwFlags
1501 ctypes
.wintypes
.DWORD
, # dwReserved
1502 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
1503 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
1504 ctypes
.POINTER(OVERLAPPED
) # Overlapped
1506 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
1507 UnlockFileEx
= kernel32
.UnlockFileEx
1508 UnlockFileEx
.argtypes
= [
1509 ctypes
.wintypes
.HANDLE
, # hFile
1510 ctypes
.wintypes
.DWORD
, # dwReserved
1511 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
1512 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
1513 ctypes
.POINTER(OVERLAPPED
) # Overlapped
1515 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
1516 whole_low
= 0xffffffff
1517 whole_high
= 0x7fffffff
1519 def _lock_file(f
, exclusive
, block
):
1520 overlapped
= OVERLAPPED()
1521 overlapped
.Offset
= 0
1522 overlapped
.OffsetHigh
= 0
1523 overlapped
.hEvent
= 0
1524 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
1526 if not LockFileEx(msvcrt
.get_osfhandle(f
.fileno()),
1527 (0x2 if exclusive
else 0x0) |
(0x0 if block
else 0x1),
1528 0, whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1529 # NB: No argument form of "ctypes.FormatError" does not work on PyPy
1530 raise BlockingIOError(f
'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
1532 def _unlock_file(f
):
1533 assert f
._lock
_file
_overlapped
_p
1534 handle
= msvcrt
.get_osfhandle(f
.fileno())
1535 if not UnlockFileEx(handle
, 0, whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1536 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
1542 def _lock_file(f
, exclusive
, block
):
1543 flags
= fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
1545 flags |
= fcntl
.LOCK_NB
1547 fcntl
.flock(f
, flags
)
1548 except BlockingIOError
:
1550 except OSError: # AOSP does not have flock()
1551 fcntl
.lockf(f
, flags
)
1553 def _unlock_file(f
):
1554 with contextlib
.suppress(OSError):
1555 return fcntl
.flock(f
, fcntl
.LOCK_UN
)
1556 with contextlib
.suppress(OSError):
1557 return fcntl
.lockf(f
, fcntl
.LOCK_UN
) # AOSP does not have flock()
1558 return fcntl
.flock(f
, fcntl
.LOCK_UN | fcntl
.LOCK_NB
) # virtiofs needs LOCK_NB on unlocking
1562 def _lock_file(f
, exclusive
, block
):
1563 raise LockingUnsupportedError()
1565 def _unlock_file(f
):
1566 raise LockingUnsupportedError()
1572 def __init__(self
, filename
, mode
, block
=True, encoding
=None):
1573 if mode
not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}
:
1574 raise NotImplementedError(mode
)
1575 self
.mode
, self
.block
= mode
, block
1577 writable
= any(f
in mode
for f
in 'wax+')
1578 readable
= any(f
in mode
for f
in 'r+')
1579 flags
= functools
.reduce(operator
.ior
, (
1580 getattr(os
, 'O_CLOEXEC', 0), # UNIX only
1581 getattr(os
, 'O_BINARY', 0), # Windows only
1582 getattr(os
, 'O_NOINHERIT', 0), # Windows only
1583 os
.O_CREAT
if writable
else 0, # O_TRUNC only after locking
1584 os
.O_APPEND
if 'a' in mode
else 0,
1585 os
.O_EXCL
if 'x' in mode
else 0,
1586 os
.O_RDONLY
if not writable
else os
.O_RDWR
if readable
else os
.O_WRONLY
,
1589 self
.f
= os
.fdopen(os
.open(filename
, flags
, 0o666), mode
, encoding
=encoding
)
1591 def __enter__(self
):
1592 exclusive
= 'r' not in self
.mode
1594 _lock_file(self
.f
, exclusive
, self
.block
)
1599 if 'w' in self
.mode
:
1602 except OSError as e
:
1604 errno
.ESPIPE
, # Illegal seek - expected for FIFO
1605 errno
.EINVAL
, # Invalid argument - expected for /dev/null
1614 _unlock_file(self
.f
)
1618 def __exit__(self
, *_
):
1627 def __getattr__(self
, attr
):
1628 return getattr(self
.f
, attr
)
1635 def get_filesystem_encoding():
1636 encoding
= sys
.getfilesystemencoding()
1637 return encoding
if encoding
is not None else 'utf-8'
1640 def shell_quote(args
):
1642 encoding
= get_filesystem_encoding()
1644 if isinstance(a
, bytes):
1645 # We may get a filename encoded with 'encodeFilename'
1646 a
= a
.decode(encoding
)
1647 quoted_args
.append(compat_shlex_quote(a
))
1648 return ' '.join(quoted_args
)
1651 def smuggle_url(url
, data
):
1652 """ Pass additional data in a URL for internal use. """
1654 url
, idata
= unsmuggle_url(url
, {})
1656 sdata
= urllib
.parse
.urlencode(
1657 {'__youtubedl_smuggle': json.dumps(data)}
)
1658 return url
+ '#' + sdata
1661 def unsmuggle_url(smug_url
, default
=None):
1662 if '#__youtubedl_smuggle' not in smug_url
:
1663 return smug_url
, default
1664 url
, _
, sdata
= smug_url
.rpartition('#')
1665 jsond
= urllib
.parse
.parse_qs(sdata
)['__youtubedl_smuggle'][0]
1666 data
= json
.loads(jsond
)
1670 def format_decimal_suffix(num
, fmt
='%d%s', *, factor
=1000):
1671 """ Formats numbers with decimal sufixes like K, M, etc """
1672 num
, factor
= float_or_none(num
), float(factor
)
1673 if num
is None or num
< 0:
1675 POSSIBLE_SUFFIXES
= 'kMGTPEZY'
1676 exponent
= 0 if num
== 0 else min(int(math
.log(num
, factor
)), len(POSSIBLE_SUFFIXES
))
1677 suffix
= ['', *POSSIBLE_SUFFIXES
][exponent
]
1679 suffix
= {'k': 'Ki', '': ''}
.get(suffix
, f
'{suffix}i')
1680 converted
= num
/ (factor
** exponent
)
1681 return fmt
% (converted
, suffix
)
1684 def format_bytes(bytes):
1685 return format_decimal_suffix(bytes, '%.2f%sB', factor
=1024) or 'N/A'
1688 def lookup_unit_table(unit_table
, s
, strict
=False):
1689 num_re
= NUMBER_RE
if strict
else NUMBER_RE
.replace(R
'\.', '[,.]')
1690 units_re
= '|'.join(re
.escape(u
) for u
in unit_table
)
1691 m
= (re
.fullmatch
if strict
else re
.match
)(
1692 rf
'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s
)
1696 num
= float(m
.group('num').replace(',', '.'))
1697 mult
= unit_table
[m
.group('unit')]
1698 return round(num
* mult
)
1702 """Parse a string indicating a byte quantity into an integer"""
1703 return lookup_unit_table(
1704 {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])}
,
1705 s
.upper(), strict
=True)
1708 def parse_filesize(s
):
1712 # The lower-case forms are of course incorrect and unofficial,
1713 # but we support those too
1730 'megabytes': 1000 ** 2,
1731 'mebibytes': 1024 ** 2,
1737 'gigabytes': 1000 ** 3,
1738 'gibibytes': 1024 ** 3,
1744 'terabytes': 1000 ** 4,
1745 'tebibytes': 1024 ** 4,
1751 'petabytes': 1000 ** 5,
1752 'pebibytes': 1024 ** 5,
1758 'exabytes': 1000 ** 6,
1759 'exbibytes': 1024 ** 6,
1765 'zettabytes': 1000 ** 7,
1766 'zebibytes': 1024 ** 7,
1772 'yottabytes': 1000 ** 8,
1773 'yobibytes': 1024 ** 8,
1776 return lookup_unit_table(_UNIT_TABLE
, s
)
1783 s
= re
.sub(r
'^[^\d]+\s', '', s
).strip()
1785 if re
.match(r
'^[\d,.]+$', s
):
1786 return str_to_int(s
)
1799 ret
= lookup_unit_table(_UNIT_TABLE
, s
)
1803 mobj
= re
.match(r
'([\d,.]+)(?:$|\s)', s
)
1805 return str_to_int(mobj
.group(1))
1808 def parse_resolution(s
, *, lenient
=False):
1813 mobj
= re
.search(r
'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s
)
1815 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s
)
1818 'width': int(mobj
.group('w')),
1819 'height': int(mobj
.group('h')),
1822 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s
)
1824 return {'height': int(mobj.group(1))}
1826 mobj
= re
.search(r
'\b([48])[kK]\b', s
)
1828 return {'height': int(mobj.group(1)) * 540}
1833 def parse_bitrate(s
):
1834 if not isinstance(s
, str):
1836 mobj
= re
.search(r
'\b(\d+)\s*kbps', s
)
1838 return int(mobj
.group(1))
1841 def month_by_name(name
, lang
='en'):
1842 """ Return the number of a month by (locale-independently) English name """
1844 month_names
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en'])
1847 return month_names
.index(name
) + 1
1852 def month_by_abbreviation(abbrev
):
1853 """ Return the number of a month by (locale-independently) English
1857 return [s
[:3] for s
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1
1862 def fix_xml_ampersands(xml_str
):
1863 """Replace all the '&' by '&' in XML"""
1865 r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1870 def setproctitle(title
):
1871 assert isinstance(title
, str)
1873 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
1880 libc
= ctypes
.cdll
.LoadLibrary('libc.so.6')
1884 # LoadLibrary in Windows Python 2.7.13 only expects
1885 # a bytestring, but since unicode_literals turns
1886 # every string into a unicode string, it fails.
1888 title_bytes
= title
.encode()
1889 buf
= ctypes
.create_string_buffer(len(title_bytes
))
1890 buf
.value
= title_bytes
1892 # PR_SET_NAME = 15 Ref: /usr/include/linux/prctl.h
1893 libc
.prctl(15, buf
, 0, 0, 0)
1894 except AttributeError:
1895 return # Strange libc, just skip this
1898 def remove_start(s
, start
):
1899 return s
[len(start
):] if s
is not None and s
.startswith(start
) else s
1902 def remove_end(s
, end
):
1903 return s
[:-len(end
)] if s
is not None and s
.endswith(end
) else s
1906 def remove_quotes(s
):
1907 if s
is None or len(s
) < 2:
1909 for quote
in ('"', "'", ):
1910 if s
[0] == quote
and s
[-1] == quote
:
1915 def get_domain(url
):
1917 This implementation is inconsistent, but is kept for compatibility.
1918 Use this only for "webpage_url_domain"
1920 return remove_start(urllib
.parse
.urlparse(url
).netloc
, 'www.') or None
1923 def url_basename(url
):
1924 path
= urllib
.parse
.urlparse(url
).path
1925 return path
.strip('/').split('/')[-1]
1929 return re
.match(r
'https?://[^?#]+/', url
).group()
1932 def urljoin(base
, path
):
1933 if isinstance(path
, bytes):
1934 path
= path
.decode()
1935 if not isinstance(path
, str) or not path
:
1937 if re
.match(r
'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path
):
1939 if isinstance(base
, bytes):
1940 base
= base
.decode()
1941 if not isinstance(base
, str) or not re
.match(
1942 r
'^(?:https?:)?//', base
):
1944 return urllib
.parse
.urljoin(base
, path
)
1947 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1):
1948 if get_attr
and v
is not None:
1949 v
= getattr(v
, get_attr
, None)
1951 return int(v
) * invscale
// scale
1952 except (ValueError, TypeError, OverflowError):
1956 def str_or_none(v
, default
=None):
1957 return default
if v
is None else str(v
)
1960 def str_to_int(int_str
):
1961 """ A more relaxed version of int_or_none """
1962 if isinstance(int_str
, int):
1964 elif isinstance(int_str
, str):
1965 int_str
= re
.sub(r
'[,\.\+]', '', int_str
)
1966 return int_or_none(int_str
)
1969 def float_or_none(v
, scale
=1, invscale
=1, default
=None):
1973 return float(v
) * invscale
/ scale
1974 except (ValueError, TypeError):
1978 def bool_or_none(v
, default
=None):
1979 return v
if isinstance(v
, bool) else default
1982 def strip_or_none(v
, default
=None):
1983 return v
.strip() if isinstance(v
, str) else default
1986 def url_or_none(url
):
1987 if not url
or not isinstance(url
, str):
1990 return url
if re
.match(r
'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url
) else None
1993 def strftime_or_none(timestamp
, date_format
='%Y%m%d', default
=None):
1994 datetime_object
= None
1996 if isinstance(timestamp
, (int, float)): # unix timestamp
1997 # Using naive datetime here can break timestamp() in Windows
1998 # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
1999 # Also, datetime.datetime.fromtimestamp breaks for negative timestamps
2000 # Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
2001 datetime_object
= (datetime
.datetime
.fromtimestamp(0, datetime
.timezone
.utc
)
2002 + datetime
.timedelta(seconds
=timestamp
))
2003 elif isinstance(timestamp
, str): # assume YYYYMMDD
2004 datetime_object
= datetime
.datetime
.strptime(timestamp
, '%Y%m%d')
2005 date_format
= re
.sub( # Support %s on windows
2006 r
'(?<!%)(%%)*%s', rf
'\g<1>{int(datetime_object.timestamp())}', date_format
)
2007 return datetime_object
.strftime(date_format
)
2008 except (ValueError, TypeError, AttributeError):
2012 def parse_duration(s
):
2013 if not isinstance(s
, str):
2019 days
, hours
, mins
, secs
, ms
= [None] * 5
2020 m
= re
.match(r
'''(?x)
2022 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2023 (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2024 (?P<ms>[.:][0-9]+)?Z?$
2027 days
, hours
, mins
, secs
, ms
= m
.group('days', 'hours', 'mins', 'secs', 'ms')
2032 [0-9]+\s*y(?:ears?)?,?\s*
2035 [0-9]+\s*m(?:onths?)?,?\s*
2038 [0-9]+\s*w(?:eeks?)?,?\s*
2041 (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2045 (?P<hours>[0-9]+)\s*h(?:(?:ou)?rs?)?,?\s*
2048 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2051 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2054 days
, hours
, mins
, secs
, ms
= m
.groups()
2056 m
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
)
2058 hours
, mins
= m
.groups()
2063 ms
= ms
.replace(':', '.')
2064 return sum(float(part
or 0) * mult
for part
, mult
in (
2065 (days
, 86400), (hours
, 3600), (mins
, 60), (secs
, 1), (ms
, 1)))
2068 def prepend_extension(filename
, ext
, expected_real_ext
=None):
2069 name
, real_ext
= os
.path
.splitext(filename
)
2071 f
'{name}.{ext}{real_ext}'
2072 if not expected_real_ext
or real_ext
[1:] == expected_real_ext
2073 else f
'{filename}.{ext}')
2076 def replace_extension(filename
, ext
, expected_real_ext
=None):
2077 name
, real_ext
= os
.path
.splitext(filename
)
2078 return '{}.{}'.format(
2079 name
if not expected_real_ext
or real_ext
[1:] == expected_real_ext
else filename
,
2083 def check_executable(exe
, args
=[]):
2084 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2085 args can be a list of arguments for a short output (like -version) """
2087 Popen
.run([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2093 def _get_exe_version_output(exe
, args
):
2095 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2096 # SIGTTOU if yt-dlp is run in the background.
2097 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2098 stdout
, _
, ret
= Popen
.run([encodeArgument(exe
)] + args
, text
=True,
2099 stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
)
2107 def detect_exe_version(output
, version_re
=None, unrecognized
='present'):
2108 assert isinstance(output
, str)
2109 if version_re
is None:
2110 version_re
= r
'version\s+([-0-9._a-zA-Z]+)'
2111 m
= re
.search(version_re
, output
)
2118 def get_exe_version(exe
, args
=['--version'],
2119 version_re
=None, unrecognized
=('present', 'broken')):
2120 """ Returns the version of the specified executable,
2121 or False if the executable is not present """
2122 unrecognized
= variadic(unrecognized
)
2123 assert len(unrecognized
) in (1, 2)
2124 out
= _get_exe_version_output(exe
, args
)
2126 return unrecognized
[-1]
2127 return out
and detect_exe_version(out
, version_re
, unrecognized
[0])
2130 def frange(start
=0, stop
=None, step
=1):
2133 start
, stop
= 0, start
2134 sign
= [-1, 1][step
> 0] if step
else 0
2135 while sign
* start
< sign
* stop
:
2140 class LazyList(collections
.abc
.Sequence
):
2141 """Lazy immutable list from an iterable
2142 Note that slices of a LazyList are lists and not LazyList"""
2144 class IndexError(IndexError):
2147 def __init__(self
, iterable
, *, reverse
=False, _cache
=None):
2148 self
._iterable
= iter(iterable
)
2149 self
._cache
= [] if _cache
is None else _cache
2150 self
._reversed
= reverse
2154 # We need to consume the entire iterable to iterate in reverse
2155 yield from self
.exhaust()
2157 yield from self
._cache
2158 for item
in self
._iterable
:
2159 self
._cache
.append(item
)
2163 self
._cache
.extend(self
._iterable
)
2164 self
._iterable
= [] # Discard the emptied iterable to make it pickle-able
2168 """Evaluate the entire iterable"""
2169 return self
._exhaust
()[::-1 if self
._reversed
else 1]
2172 def _reverse_index(x
):
2173 return None if x
is None else ~x
2175 def __getitem__(self
, idx
):
2176 if isinstance(idx
, slice):
2178 idx
= slice(self
._reverse
_index
(idx
.start
), self
._reverse
_index
(idx
.stop
), -(idx
.step
or 1))
2179 start
, stop
, step
= idx
.start
, idx
.stop
, idx
.step
or 1
2180 elif isinstance(idx
, int):
2182 idx
= self
._reverse
_index
(idx
)
2183 start
, stop
, step
= idx
, idx
, 0
2185 raise TypeError('indices must be integers or slices')
2186 if ((start
or 0) < 0 or (stop
or 0) < 0
2187 or (start
is None and step
< 0)
2188 or (stop
is None and step
> 0)):
2189 # We need to consume the entire iterable to be able to slice from the end
2190 # Obviously, never use this with infinite iterables
2193 return self
._cache
[idx
]
2194 except IndexError as e
:
2195 raise self
.IndexError(e
) from e
2196 n
= max(start
or 0, stop
or 0) - len(self
._cache
) + 1
2198 self
._cache
.extend(itertools
.islice(self
._iterable
, n
))
2200 return self
._cache
[idx
]
2201 except IndexError as e
:
2202 raise self
.IndexError(e
) from e
2206 self
[-1] if self
._reversed
else self
[0]
2207 except self
.IndexError:
2213 return len(self
._cache
)
2215 def __reversed__(self
):
2216 return type(self
)(self
._iterable
, reverse
=not self
._reversed
, _cache
=self
._cache
)
2219 return type(self
)(self
._iterable
, reverse
=self
._reversed
, _cache
=self
._cache
)
2222 # repr and str should mimic a list. So we exhaust the iterable
2223 return repr(self
.exhaust())
2226 return repr(self
.exhaust())
2231 class IndexError(IndexError):
2235 # This is only useful for tests
2236 return len(self
.getslice())
2238 def __init__(self
, pagefunc
, pagesize
, use_cache
=True):
2239 self
._pagefunc
= pagefunc
2240 self
._pagesize
= pagesize
2241 self
._pagecount
= float('inf')
2242 self
._use
_cache
= use_cache
2245 def getpage(self
, pagenum
):
2246 page_results
= self
._cache
.get(pagenum
)
2247 if page_results
is None:
2248 page_results
= [] if pagenum
> self
._pagecount
else list(self
._pagefunc
(pagenum
))
2250 self
._cache
[pagenum
] = page_results
2253 def getslice(self
, start
=0, end
=None):
2254 return list(self
._getslice
(start
, end
))
2256 def _getslice(self
, start
, end
):
2257 raise NotImplementedError('This method must be implemented by subclasses')
2259 def __getitem__(self
, idx
):
2260 assert self
._use
_cache
, 'Indexing PagedList requires cache'
2261 if not isinstance(idx
, int) or idx
< 0:
2262 raise TypeError('indices must be non-negative integers')
2263 entries
= self
.getslice(idx
, idx
+ 1)
2265 raise self
.IndexError()
2269 return bool(self
.getslice(0, 1))
2272 class OnDemandPagedList(PagedList
):
2273 """Download pages until a page with less than maximum results"""
2275 def _getslice(self
, start
, end
):
2276 for pagenum
in itertools
.count(start
// self
._pagesize
):
2277 firstid
= pagenum
* self
._pagesize
2278 nextfirstid
= pagenum
* self
._pagesize
+ self
._pagesize
2279 if start
>= nextfirstid
:
2283 start
% self
._pagesize
2284 if firstid
<= start
< nextfirstid
2287 ((end
- 1) % self
._pagesize
) + 1
2288 if (end
is not None and firstid
<= end
<= nextfirstid
)
2292 page_results
= self
.getpage(pagenum
)
2294 self
._pagecount
= pagenum
- 1
2296 if startv
!= 0 or endv
is not None:
2297 page_results
= page_results
[startv
:endv
]
2298 yield from page_results
2300 # A little optimization - if current page is not "full", ie. does
2301 # not contain page_size videos then we can assume that this page
2302 # is the last one - there are no more ids on further pages -
2303 # i.e. no need to query again.
2304 if len(page_results
) + startv
< self
._pagesize
:
2307 # If we got the whole page, but the next page is not interesting,
2308 # break out early as well
2309 if end
== nextfirstid
:
2313 class InAdvancePagedList(PagedList
):
2314 """PagedList with total number of pages known in advance"""
2316 def __init__(self
, pagefunc
, pagecount
, pagesize
):
2317 PagedList
.__init
__(self
, pagefunc
, pagesize
, True)
2318 self
._pagecount
= pagecount
2320 def _getslice(self
, start
, end
):
2321 start_page
= start
// self
._pagesize
2322 end_page
= self
._pagecount
if end
is None else min(self
._pagecount
, end
// self
._pagesize
+ 1)
2323 skip_elems
= start
- start_page
* self
._pagesize
2324 only_more
= None if end
is None else end
- start
2325 for pagenum
in range(start_page
, end_page
):
2326 page_results
= self
.getpage(pagenum
)
2328 page_results
= page_results
[skip_elems
:]
2330 if only_more
is not None:
2331 if len(page_results
) < only_more
:
2332 only_more
-= len(page_results
)
2334 yield from page_results
[:only_more
]
2336 yield from page_results
2339 class PlaylistEntries
:
2340 MissingEntry
= object()
2341 is_exhausted
= False
2343 def __init__(self
, ydl
, info_dict
):
2346 # _entries must be assigned now since infodict can change during iteration
2347 entries
= info_dict
.get('entries')
2349 raise EntryNotInPlaylist('There are no entries')
2350 elif isinstance(entries
, list):
2351 self
.is_exhausted
= True
2353 requested_entries
= info_dict
.get('requested_entries')
2354 self
.is_incomplete
= requested_entries
is not None
2355 if self
.is_incomplete
:
2356 assert self
.is_exhausted
2357 self
._entries
= [self
.MissingEntry
] * max(requested_entries
or [0])
2358 for i
, entry
in zip(requested_entries
, entries
):
2359 self
._entries
[i
- 1] = entry
2360 elif isinstance(entries
, (list, PagedList
, LazyList
)):
2361 self
._entries
= entries
2363 self
._entries
= LazyList(entries
)
2365 PLAYLIST_ITEMS_RE
= re
.compile(r
'''(?x)
2366 (?P<start>[+-]?\d+)?
2368 (?P<end>[+-]?\d+|inf(?:inite)?)?
2369 (?::(?P<step>[+-]?\d+))?
2373 def parse_playlist_items(cls
, string
):
2374 for segment
in string
.split(','):
2376 raise ValueError('There is two or more consecutive commas')
2377 mobj
= cls
.PLAYLIST_ITEMS_RE
.fullmatch(segment
)
2379 raise ValueError(f
'{segment!r} is not a valid specification')
2380 start
, end
, step
, has_range
= mobj
.group('start', 'end', 'step', 'range')
2381 if int_or_none(step
) == 0:
2382 raise ValueError(f
'Step in {segment!r} cannot be zero')
2383 yield slice(int_or_none(start
), float_or_none(end
), int_or_none(step
)) if has_range
else int(start
)
2385 def get_requested_items(self
):
2386 playlist_items
= self
.ydl
.params
.get('playlist_items')
2387 playlist_start
= self
.ydl
.params
.get('playliststart', 1)
2388 playlist_end
= self
.ydl
.params
.get('playlistend')
2389 # For backwards compatibility, interpret -1 as whole list
2390 if playlist_end
in (-1, None):
2392 if not playlist_items
:
2393 playlist_items
= f
'{playlist_start}:{playlist_end}'
2394 elif playlist_start
!= 1 or playlist_end
:
2395 self
.ydl
.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once
=True)
2397 for index
in self
.parse_playlist_items(playlist_items
):
2398 for i
, entry
in self
[index
]:
2403 # The item may have just been added to archive. Don't break due to it
2404 if not self
.ydl
.params
.get('lazy_playlist'):
2405 # TODO: Add auto-generated fields
2406 self
.ydl
._match
_entry
(entry
, incomplete
=True, silent
=True)
2407 except (ExistingVideoReached
, RejectedVideoReached
):
2410 def get_full_count(self
):
2411 if self
.is_exhausted
and not self
.is_incomplete
:
2413 elif isinstance(self
._entries
, InAdvancePagedList
):
2414 if self
._entries
._pagesize
== 1:
2415 return self
._entries
._pagecount
2417 @functools.cached_property
2419 if isinstance(self
._entries
, list):
2422 entry
= self
._entries
[i
]
2424 entry
= self
.MissingEntry
2425 if not self
.is_incomplete
:
2426 raise self
.IndexError()
2427 if entry
is self
.MissingEntry
:
2428 raise EntryNotInPlaylist(f
'Entry {i + 1} cannot be found')
2433 return type(self
.ydl
)._handle
_extraction
_exceptions
(lambda _
, i
: self
._entries
[i
])(self
.ydl
, i
)
2434 except (LazyList
.IndexError, PagedList
.IndexError):
2435 raise self
.IndexError()
2438 def __getitem__(self
, idx
):
2439 if isinstance(idx
, int):
2440 idx
= slice(idx
, idx
)
2442 # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
2443 step
= 1 if idx
.step
is None else idx
.step
2444 if idx
.start
is None:
2445 start
= 0 if step
> 0 else len(self
) - 1
2447 start
= idx
.start
- 1 if idx
.start
>= 0 else len(self
) + idx
.start
2449 # NB: Do not call len(self) when idx == [:]
2450 if idx
.stop
is None:
2451 stop
= 0 if step
< 0 else float('inf')
2453 stop
= idx
.stop
- 1 if idx
.stop
>= 0 else len(self
) + idx
.stop
2454 stop
+= [-1, 1][step
> 0]
2456 for i
in frange(start
, stop
, step
):
2460 entry
= self
._getter
(i
)
2461 except self
.IndexError:
2462 self
.is_exhausted
= True
2469 return len(tuple(self
[:]))
2471 class IndexError(IndexError):
2475 def uppercase_escape(s
):
2476 unicode_escape
= codecs
.getdecoder('unicode_escape')
2478 r
'\\U[0-9a-fA-F]{8}',
2479 lambda m
: unicode_escape(m
.group(0))[0],
2483 def lowercase_escape(s
):
2484 unicode_escape
= codecs
.getdecoder('unicode_escape')
2486 r
'\\u[0-9a-fA-F]{4}',
2487 lambda m
: unicode_escape(m
.group(0))[0],
2491 def parse_qs(url
, **kwargs
):
2492 return urllib
.parse
.parse_qs(urllib
.parse
.urlparse(url
).query
, **kwargs
)
2495 def read_batch_urls(batch_fd
):
2497 if not isinstance(url
, str):
2498 url
= url
.decode('utf-8', 'replace')
2499 BOM_UTF8
= ('\xef\xbb\xbf', '\ufeff')
2500 for bom
in BOM_UTF8
:
2501 if url
.startswith(bom
):
2502 url
= url
[len(bom
):]
2504 if not url
or url
.startswith(('#', ';', ']')):
2506 # "#" cannot be stripped out since it is part of the URI
2507 # However, it can be safely stripped out if following a whitespace
2508 return re
.split(r
'\s#', url
, 1)[0].rstrip()
2510 with contextlib
.closing(batch_fd
) as fd
:
2511 return [url
for url
in map(fixup
, fd
) if url
]
2514 def urlencode_postdata(*args
, **kargs
):
2515 return urllib
.parse
.urlencode(*args
, **kargs
).encode('ascii')
2518 def update_url(url
, *, query_update
=None, **kwargs
):
2519 """Replace URL components specified by kwargs
2520 @param url str or parse url tuple
2521 @param query_update update query
2524 if isinstance(url
, str):
2525 if not kwargs
and not query_update
:
2528 url
= urllib
.parse
.urlparse(url
)
2530 assert 'query' not in kwargs
, 'query_update and query cannot be specified at the same time'
2531 kwargs
['query'] = urllib
.parse
.urlencode({
2532 **urllib
.parse
.parse_qs(url
.query
),
2535 return urllib
.parse
.urlunparse(url
._replace
(**kwargs
))
2538 def update_url_query(url
, query
):
2539 return update_url(url
, query_update
=query
)
2542 def _multipart_encode_impl(data
, boundary
):
2543 content_type
= 'multipart/form-data; boundary=%s' % boundary
2546 for k
, v
in data
.items():
2547 out
+= b
'--' + boundary
.encode('ascii') + b
'\r\n'
2548 if isinstance(k
, str):
2550 if isinstance(v
, str):
2552 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
2553 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
2554 content
= b
'Content-Disposition: form-data; name="' + k
+ b
'"\r\n\r\n' + v
+ b
'\r\n'
2555 if boundary
.encode('ascii') in content
:
2556 raise ValueError('Boundary overlaps with data')
2559 out
+= b
'--' + boundary
.encode('ascii') + b
'--\r\n'
2561 return out
, content_type
2564 def multipart_encode(data
, boundary
=None):
2566 Encode a dict to RFC 7578-compliant form-data
2569 A dict where keys and values can be either Unicode or bytes-like
2572 If specified a Unicode object, it's used as the boundary. Otherwise
2573 a random boundary is generated.
2575 Reference: https://tools.ietf.org/html/rfc7578
2577 has_specified_boundary
= boundary
is not None
2580 if boundary
is None:
2581 boundary
= '---------------' + str(random
.randrange(0x0fffffff, 0xffffffff))
2584 out
, content_type
= _multipart_encode_impl(data
, boundary
)
2587 if has_specified_boundary
:
2591 return out
, content_type
2594 def is_iterable_like(x
, allowed_types
=collections
.abc
.Iterable
, blocked_types
=NO_DEFAULT
):
2595 if blocked_types
is NO_DEFAULT
:
2596 blocked_types
= (str, bytes, collections
.abc
.Mapping
)
2597 return isinstance(x
, allowed_types
) and not isinstance(x
, blocked_types
)
2600 def variadic(x
, allowed_types
=NO_DEFAULT
):
2601 if not isinstance(allowed_types
, (tuple, type)):
2602 deprecation_warning('allowed_types should be a tuple or a type')
2603 allowed_types
= tuple(allowed_types
)
2604 return x
if is_iterable_like(x
, blocked_types
=allowed_types
) else (x
, )
2607 def try_call(*funcs
, expected_type
=None, args
=[], kwargs
={}):
2610 val
= f(*args
, **kwargs
)
2611 except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
2614 if expected_type
is None or isinstance(val
, expected_type
):
2618 def try_get(src
, getter
, expected_type
=None):
2619 return try_call(*variadic(getter
), args
=(src
,), expected_type
=expected_type
)
2622 def filter_dict(dct
, cndn
=lambda _
, v
: v
is not None):
2623 return {k: v for k, v in dct.items() if cndn(k, v)}
2626 def merge_dicts(*dicts
):
2628 for a_dict
in dicts
:
2629 for k
, v
in a_dict
.items():
2630 if (v
is not None and k
not in merged
2631 or isinstance(v
, str) and merged
[k
] == ''):
2636 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'):
2637 return string
if isinstance(string
, str) else str(string
, encoding
, errors
)
2649 TV_PARENTAL_GUIDELINES
= {
2659 def parse_age_limit(s
):
2660 # isinstance(False, int) is True. So type() must be used instead
2661 if type(s
) is int: # noqa: E721
2662 return s
if 0 <= s
<= 21 else None
2663 elif not isinstance(s
, str):
2665 m
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
)
2667 return int(m
.group('age'))
2670 return US_RATINGS
[s
]
2671 m
= re
.match(r
'^TV[_-]?(%s)$' % '|'.join(k
[3:] for k
in TV_PARENTAL_GUIDELINES
), s
)
2673 return TV_PARENTAL_GUIDELINES
['TV-' + m
.group(1)]
2677 def strip_jsonp(code
):
2680 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
2681 (?:\s*&&\s*(?P=func_name))?
2682 \s*\(\s*(?P<callback_data>.*)\);?
2683 \s*?(?://[^\n]*)*$''',
2684 r
'\g<callback_data>', code
)
2687 def js_to_json(code
, vars={}, *, strict
=False):
2688 # vars is a dict of var, val pairs to substitute
2689 STRING_QUOTES
= '\'"`'
2690 STRING_RE
= '|'.join(rf
'{q}(?:\\.|[^\\{q}])*{q}' for q
in STRING_QUOTES
)
2691 COMMENT_RE
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
2692 SKIP_RE
= fr
'\s*(?:{COMMENT_RE})?\s*'
2694 (fr
'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
2695 (fr
'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
2698 def process_escape(match
):
2699 JSON_PASSTHROUGH_ESCAPES
= R
'"\bfnrtu'
2700 escape
= match
.group(1) or match
.group(2)
2702 return (Rf
'\{escape}' if escape
in JSON_PASSTHROUGH_ESCAPES
2703 else R
'\u00' if escape
== 'x'
2704 else '' if escape
== '\n'
2707 def template_substitute(match
):
2708 evaluated
= js_to_json(match
.group(1), vars, strict
=strict
)
2709 if evaluated
[0] == '"':
2710 return json
.loads(evaluated
)
2715 if v
in ('true', 'false', 'null'):
2717 elif v
in ('undefined', 'void 0'):
2719 elif v
.startswith('/*') or v
.startswith('//') or v
.startswith('!') or v
== ',':
2722 if v
[0] in STRING_QUOTES
:
2723 v
= re
.sub(r
'(?s)\${([^}]+)}', template_substitute
, v
[1:-1]) if v
[0] == '`' else v
[1:-1]
2724 escaped
= re
.sub(r
'(?s)(")|\\(.)', process_escape
, v
)
2725 return f
'"{escaped}"'
2727 for regex
, base
in INTEGER_TABLE
:
2728 im
= re
.match(regex
, v
)
2730 i
= int(im
.group(1), base
)
2731 return f
'"{i}":' if v
.endswith(':') else str(i
)
2737 except json
.JSONDecodeError
:
2738 return json
.dumps(vars[v
])
2745 raise ValueError(f
'Unknown value: {v}')
2747 def create_map(mobj
):
2748 return json
.dumps(dict(json
.loads(js_to_json(mobj
.group(1) or '[]', vars=vars))))
2750 code
= re
.sub(r
'(?:new\s+)?Array\((.*?)\)', r
'[\g<1>]', code
)
2751 code
= re
.sub(r
'new Map\((\[.*?\])?\)', create_map
, code
)
2753 code
= re
.sub(rf
'new Date\(({STRING_RE})\)', r
'\g<1>', code
)
2754 code
= re
.sub(r
'new \w+\((.*?)\)', lambda m
: json
.dumps(m
.group(0)), code
)
2755 code
= re
.sub(r
'parseInt\([^\d]+(\d+)[^\d]+\)', r
'\1', code
)
2756 code
= re
.sub(r
'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^
)]*["\'])\s*\)', r'\1', code)
2758 return re.sub(rf'''(?sx)
2760 {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
2761 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
2762 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
2763 [0-9]+(?={SKIP_RE}:)|
2768 def qualities(quality_ids):
2769 """ Get a numeric quality value out of a list of possible values """
2772 return quality_ids.index(qid)
2778 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
2782 'default': '%(title)s [%(id)s].%(ext)s',
2783 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
2789 'description': 'description',
2790 'annotation': 'annotations.xml',
2791 'infojson': 'info.json',
2794 'pl_thumbnail': None,
2795 'pl_description': 'description',
2796 'pl_infojson': 'info.json',
2799 # As of [1] format syntax is:
2800 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
2801 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
2802 STR_FORMAT_RE_TMPL = r'''(?x)
2803 (?<!%)(?P<prefix>(?:%%)*)
2805 (?P<has_key>\((?P<key>{0})\))?
2807 (?P<conversion>[#0\-+ ]+)?
2809 (?P<precision>\.\d+)?
2810 (?P<len_mod>[hlL])? # unused in python
2811 {1} # conversion type
2816 STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'
2819 def limit_length(s, length):
2820 """ Add ellipses to overly long strings """
2825 return s[:length - len(ELLIPSES)] + ELLIPSES
2829 def version_tuple(v):
2830 return tuple(int(e) for e in re.split(r'[-.]', v))
2833 def is_outdated_version(version, limit, assume_new=True):
2835 return not assume_new
2837 return version_tuple(version) < version_tuple(limit)
2839 return not assume_new
2842 def ytdl_is_updateable():
2843 """ Returns if yt-dlp can be updated with -U """
2845 from ..update import is_non_updateable
2847 return not is_non_updateable()
2850 def args_to_str(args):
2851 # Get a short string representation for a subprocess command
2852 return ' '.join(compat_shlex_quote(a) for a in args)
2855 def error_to_str(err):
2856 return f'{type(err).__name__}: {err}'
2859 def mimetype2ext(mt, default=NO_DEFAULT):
2860 if not isinstance(mt, str):
2861 if default is not NO_DEFAULT:
2878 'x-matroska': 'mkv',
2880 'x-mp4-fragmented': 'mp4',
2885 # application (streaming playlists)
2889 'vnd.apple.mpegurl': 'm3u8',
2890 'vnd.ms-sstr+xml': 'ism',
2891 'x-mpegurl': 'm3u8',
2895 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
2896 # Using .mp3 as it's the most popular one
2897 'audio/mpeg': 'mp3',
2898 'audio/webm': 'webm',
2899 'audio/x-matroska': 'mka',
2900 'audio/x-mpegurl': 'm3u',
2908 'x-realaudio': 'ra',
2919 'vnd.wap.wbmp': 'wbmp',
2926 'filmstrip+json': 'fs',
2927 'smptett+xml': 'tt',
2930 'x-ms-sami': 'sami',
2939 mimetype = mt.partition(';')[0].strip().lower()
2940 _, _, subtype = mimetype.rpartition('/')
2942 ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
2945 elif default is not NO_DEFAULT:
2947 return subtype.replace('+', '.')
2950 def ext2mimetype(ext_or_url):
2953 if '.' not in ext_or_url:
2954 ext_or_url = f'file.{ext_or_url}'
2955 return mimetypes.guess_type(ext_or_url)[0]
2958 def parse_codecs(codecs_str):
2959 # http://tools.ietf.org/html/rfc6381
2962 split_codecs = list(filter(None, map(
2963 str.strip, codecs_str.strip().strip(',').split(','))))
2964 vcodec, acodec, scodec, hdr = None, None, None, None
2965 for full_codec in split_codecs:
2966 parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
2967 if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
2968 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
2972 if parts[0] in ('dvh1', 'dvhe'):
2974 elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
2976 elif parts[:2] == ['vp9', '2']:
2978 elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
2979 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
2980 acodec = acodec or full_codec
2981 elif parts[0] in ('stpp', 'wvtt'):
2982 scodec = scodec or full_codec
2984 write_string(f'WARNING: Unknown codec {full_codec}\n')
2985 if vcodec or acodec or scodec:
2987 'vcodec': vcodec or 'none',
2988 'acodec': acodec or 'none',
2989 'dynamic_range': hdr,
2990 **({'scodec': scodec} if scodec is not None else {}),
2992 elif len(split_codecs) == 2:
2994 'vcodec': split_codecs[0],
2995 'acodec': split_codecs[1],
3000 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
3001 assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
3003 allow_mkv = not preferences or 'mkv' in preferences
3005 if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3006 return 'mkv' # TODO: any other format allows this?
3008 # TODO: All codecs supported by parse_codecs isn't handled here
3009 COMPATIBLE_CODECS = {
3011 'av1', 'hevc', 'avc1', 'mp4a', 'ac-4', # fourcc (m3u8, mpd)
3012 'h264', 'aacl', 'ec-3', # Set in ISM
3015 'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3016 'vp9x', 'vp8x', # in the webm spec
3020 sanitize_codec = functools.partial(
3021 try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
3022 vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3024 for ext in preferences or COMPATIBLE_CODECS.keys():
3025 codec_set = COMPATIBLE_CODECS.get(ext, set())
3026 if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3030 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3033 for ext in preferences or vexts:
3034 current_exts = {ext, *vexts, *aexts}
3035 if ext == 'mkv' or current_exts == {ext} or any(
3036 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3038 return 'mkv' if allow_mkv else preferences[-1]
3041 def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
3042 getheader = url_handle.headers.get
3044 cd = getheader('Content-Disposition')
3046 m = re.match(r'attachment;\s*filename="(?P
<filename
>[^
"]+)"', cd)
3048 e = determine_ext(m.group('filename
'), default_ext=None)
3052 meta_ext = getheader('x
-amz
-meta
-name
')
3054 e = meta_ext.rpartition('.')[2]
3058 return mimetype2ext(getheader('Content
-Type
'), default=default)
3061 def encode_data_uri(data, mime_type):
3062 return 'data
:%s;base64
,%s' % (mime_type, base64.b64encode(data).decode('ascii
'))
3065 def age_restricted(content_limit, age_limit):
3066 """ Returns True iff the content should be blocked """
3068 if age_limit is None: # No limit set
3070 if content_limit is None:
3071 return False # Content available for everyone
3072 return age_limit < content_limit
3075 # List of known byte-order-marks (BOM)
3077 (b'\xef\xbb\xbf', 'utf
-8'),
3078 (b'\x00\x00\xfe\xff', 'utf
-32-be
'),
3079 (b'\xff\xfe\x00\x00', 'utf
-32-le
'),
3080 (b'\xff\xfe', 'utf
-16-le
'),
3081 (b'\xfe\xff', 'utf
-16-be
'),
3085 def is_html(first_bytes):
3086 """ Detect whether a file contains HTML by examining its first bytes. """
3089 for bom, enc in BOMS:
3090 while first_bytes.startswith(bom):
3091 encoding, first_bytes = enc, first_bytes[len(bom):]
3093 return re.match(r'^\s
*<', first_bytes.decode(encoding, 'replace
'))
3096 def determine_protocol(info_dict):
3097 protocol = info_dict.get('protocol
')
3098 if protocol is not None:
3101 url = sanitize_url(info_dict['url
'])
3102 if url.startswith('rtmp
'):
3104 elif url.startswith('mms
'):
3106 elif url.startswith('rtsp
'):
3109 ext = determine_ext(url)
3111 return 'm3u8
' if info_dict.get('is_live
') else 'm3u8_native
'
3115 return urllib.parse.urlparse(url).scheme
3118 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3119 """ Render a list of rows, each as a list of values.
3120 Text after a \t will be right aligned """
3122 return len(remove_terminal_sequences(string).replace('\t', ''))
3124 def get_max_lens(table):
3125 return [max(width(str(v)) for v in col) for col in zip(*table)]
3127 def filter_using_list(row, filterArray):
3128 return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3130 max_lens = get_max_lens(data) if hide_empty else []
3131 header_row = filter_using_list(header_row, max_lens)
3132 data = [filter_using_list(row, max_lens) for row in data]
3134 table = [header_row] + data
3135 max_lens = get_max_lens(table)
3138 table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3139 table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
3141 for pos, text in enumerate(map(str, row)):
3143 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3145 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3146 ret = '\n'.join(''.join(row).rstrip() for row in table)
3150 def _match_one(filter_part, dct, incomplete):
3151 # TODO: Generalize code with YoutubeDL._build_format_filter
3152 STRING_OPERATORS = {
3153 '*=': operator.contains,
3154 '^
=': lambda attr, value: attr.startswith(value),
3155 '$
=': lambda attr, value: attr.endswith(value),
3156 '~
=': lambda attr, value: re.search(value, attr),
3158 COMPARISON_OPERATORS = {
3160 '<=': operator.le, # "<=" must be defined above "<"
3167 if isinstance(incomplete, bool):
3168 is_incomplete = lambda _: incomplete
3170 is_incomplete = lambda k: k in incomplete
3172 operator_rex = re.compile(r'''(?x)
3174 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3176 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3179 ''' % '|
'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3180 m = operator_rex.fullmatch(filter_part.strip())
3183 unnegated_op = COMPARISON_OPERATORS[m['op
']]
3185 op = lambda attr, value: not unnegated_op(attr, value)
3188 comparison_value = m['quotedstrval
'] or m['strval
'] or m['intval
']
3190 comparison_value = comparison_value.replace(r'\
%s' % m['quote
'], m['quote
'])
3191 actual_value = dct.get(m['key
'])
3192 numeric_comparison = None
3193 if isinstance(actual_value, (int, float)):
3194 # If the original field is a string and matching comparisonvalue is
3195 # a number we should respect the origin of the original field
3196 # and process comparison value as a string (see
3197 # https://github.com/ytdl-org/youtube-dl/issues/11082)
3199 numeric_comparison = int(comparison_value)
3201 numeric_comparison = parse_filesize(comparison_value)
3202 if numeric_comparison is None:
3203 numeric_comparison = parse_filesize(f'{comparison_value}B
')
3204 if numeric_comparison is None:
3205 numeric_comparison = parse_duration(comparison_value)
3206 if numeric_comparison is not None and m['op
'] in STRING_OPERATORS:
3207 raise ValueError('Operator
%s only supports string values
!' % m['op
'])
3208 if actual_value is None:
3209 return is_incomplete(m['key
']) or m['none_inclusive
']
3210 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3213 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3214 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3216 operator_rex = re.compile(r'''(?x)
3217 (?P<op>%s)\s*(?P<key>[a-z_]+)
3218 ''' % '|
'.join(map(re.escape, UNARY_OPERATORS.keys())))
3219 m = operator_rex.fullmatch(filter_part.strip())
3221 op = UNARY_OPERATORS[m.group('op
')]
3222 actual_value = dct.get(m.group('key
'))
3223 if is_incomplete(m.group('key
')) and actual_value is None:
3225 return op(actual_value)
3227 raise ValueError('Invalid
filter part
%r' % filter_part)
3230 def match_str(filter_str, dct, incomplete=False):
3231 """ Filter a dictionary with a simple string syntax.
3232 @returns Whether the filter passes
3233 @param incomplete Set of keys that is expected to be missing from dct.
3234 Can be True/False to indicate all/none of the keys may be missing.
3235 All conditions on incomplete keys pass if the key is missing
3238 _match_one(filter_part.replace(r'\
&', '&'), dct, incomplete)
3239 for filter_part in re.split(r'(?
<!\\)&', filter_str))
3242 def match_filter_func(filters, breaking_filters=None):
3243 if not filters and not breaking_filters:
3245 repr_ = f'{match_filter_func.__module__}
.{match_filter_func.__qualname__}
({filters}
, {breaking_filters}
)'
3247 breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
3248 filters = set(variadic(filters or []))
3250 interactive = '-' in filters
3254 @function_with_repr.set_repr(repr_)
3255 def _match_func(info_dict, incomplete=False):
3256 ret = breaking_filters(info_dict, incomplete)
3258 raise RejectedVideoReached(ret)
3260 if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3261 return NO_DEFAULT if interactive and not incomplete else None
3263 video_title = info_dict.get('title
') or info_dict.get('id') or 'entry
'
3264 filter_str = ') |
('.join(map(str.strip, filters))
3265 return f'{video_title} does
not pass filter ({filter_str}
), skipping
..'
3269 class download_range_func:
3270 def __init__(self, chapters, ranges, from_info=False):
3271 self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
3273 def __call__(self, info_dict, ydl):
3275 warning = ('There are no chapters matching the regex
' if info_dict.get('chapters
')
3276 else 'Cannot match chapters since chapter information
is unavailable
')
3277 for regex in self.chapters or []:
3278 for i, chapter in enumerate(info_dict.get('chapters
') or []):
3279 if re.search(regex, chapter['title
']):
3281 yield {**chapter, 'index': i}
3282 if self.chapters and warning:
3283 ydl.to_screen(f'[info
] {info_dict["id"]}
: {warning}
')
3285 for start, end in self.ranges or []:
3287 'start_time
': self._handle_negative_timestamp(start, info_dict),
3288 'end_time
': self._handle_negative_timestamp(end, info_dict),
3291 if self.from_info and (info_dict.get('start_time
') or info_dict.get('end_time
')):
3293 'start_time
': info_dict.get('start_time
') or 0,
3294 'end_time
': info_dict.get('end_time
') or float('inf
'),
3296 elif not self.ranges and not self.chapters:
3300 def _handle_negative_timestamp(time, info):
3301 return max(info['duration
'] + time, 0) if info.get('duration
') and time < 0 else time
3303 def __eq__(self, other):
3304 return (isinstance(other, download_range_func)
3305 and self.chapters == other.chapters and self.ranges == other.ranges)
3308 return f'{__name__}
.{type(self).__name__}
({self.chapters}
, {self.ranges}
)'
3311 def parse_dfxp_time_expr(time_expr):
3315 mobj = re.match(rf'^
(?P
<time_offset
>{NUMBER_RE}
)s?$
', time_expr)
3317 return float(mobj.group('time_offset
'))
3319 mobj = re.match(r'^
(\d
+):(\d\d
):(\d\
d(?
:(?
:\
.|
:)\d
+)?
)$
', time_expr)
3321 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3324 def srt_subtitles_timecode(seconds):
3325 return '%02d
:%02d
:%02d
,%03d
' % timetuple_from_msec(seconds * 1000)
3328 def ass_subtitles_timecode(seconds):
3329 time = timetuple_from_msec(seconds * 1000)
3330 return '%01d
:%02d
:%02d
.%02d
' % (*time[:-1], time.milliseconds / 10)
3333 def dfxp2srt(dfxp_data):
3335 @param dfxp_data A bytes-like object containing DFXP data
3336 @returns A unicode object containing converted SRT data
3338 LEGACY_NAMESPACES = (
3339 (b'http
://www
.w3
.org
/ns
/ttml
', [
3340 b'http
://www
.w3
.org
/2004/11/ttaf1
',
3341 b'http
://www
.w3
.org
/2006/04/ttaf1
',
3342 b'http
://www
.w3
.org
/2006/10/ttaf1
',
3344 (b'http
://www
.w3
.org
/ns
/ttml
#styling', [
3345 b
'http://www.w3.org/ns/ttml#style',
3349 SUPPORTED_STYLING
= [
3358 _x
= functools
.partial(xpath_with_ns
, ns_map
={
3359 'xml': 'http://www.w3.org/XML/1998/namespace',
3360 'ttml': 'http://www.w3.org/ns/ttml',
3361 'tts': 'http://www.w3.org/ns/ttml#styling',
3367 class TTMLPElementParser
:
3369 _unclosed_elements
= []
3370 _applied_styles
= []
3372 def start(self
, tag
, attrib
):
3373 if tag
in (_x('ttml:br'), 'br'):
3376 unclosed_elements
= []
3378 element_style_id
= attrib
.get('style')
3380 style
.update(default_style
)
3381 if element_style_id
:
3382 style
.update(styles
.get(element_style_id
, {}))
3383 for prop
in SUPPORTED_STYLING
:
3384 prop_val
= attrib
.get(_x('tts:' + prop
))
3386 style
[prop
] = prop_val
3389 for k
, v
in sorted(style
.items()):
3390 if self
._applied
_styles
and self
._applied
_styles
[-1].get(k
) == v
:
3393 font
+= ' color="%s"' % v
3394 elif k
== 'fontSize':
3395 font
+= ' size="%s"' % v
3396 elif k
== 'fontFamily':
3397 font
+= ' face="%s"' % v
3398 elif k
== 'fontWeight' and v
== 'bold':
3400 unclosed_elements
.append('b')
3401 elif k
== 'fontStyle' and v
== 'italic':
3403 unclosed_elements
.append('i')
3404 elif k
== 'textDecoration' and v
== 'underline':
3406 unclosed_elements
.append('u')
3408 self
._out
+= '<font' + font
+ '>'
3409 unclosed_elements
.append('font')
3411 if self
._applied
_styles
:
3412 applied_style
.update(self
._applied
_styles
[-1])
3413 applied_style
.update(style
)
3414 self
._applied
_styles
.append(applied_style
)
3415 self
._unclosed
_elements
.append(unclosed_elements
)
3418 if tag
not in (_x('ttml:br'), 'br'):
3419 unclosed_elements
= self
._unclosed
_elements
.pop()
3420 for element
in reversed(unclosed_elements
):
3421 self
._out
+= '</%s>' % element
3422 if unclosed_elements
and self
._applied
_styles
:
3423 self
._applied
_styles
.pop()
3425 def data(self
, data
):
3429 return self
._out
.strip()
3431 # Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
3432 # This will not trigger false positives since only UTF-8 text is being replaced
3433 dfxp_data
= dfxp_data
.replace(b
'encoding=\'UTF-16\'', b
'encoding=\'UTF-8\'')
3435 def parse_node(node
):
3436 target
= TTMLPElementParser()
3437 parser
= xml
.etree
.ElementTree
.XMLParser(target
=target
)
3438 parser
.feed(xml
.etree
.ElementTree
.tostring(node
))
3439 return parser
.close()
3441 for k
, v
in LEGACY_NAMESPACES
:
3443 dfxp_data
= dfxp_data
.replace(ns
, k
)
3445 dfxp
= compat_etree_fromstring(dfxp_data
)
3447 paras
= dfxp
.findall(_x('.//ttml:p')) or dfxp
.findall('.//p')
3450 raise ValueError('Invalid dfxp/TTML subtitle')
3454 for style
in dfxp
.findall(_x('.//ttml:style')):
3455 style_id
= style
.get('id') or style
.get(_x('xml:id'))
3458 parent_style_id
= style
.get('style')
3460 if parent_style_id
not in styles
:
3463 styles
[style_id
] = styles
[parent_style_id
].copy()
3464 for prop
in SUPPORTED_STYLING
:
3465 prop_val
= style
.get(_x('tts:' + prop
))
3467 styles
.setdefault(style_id
, {})[prop
] = prop_val
3473 for p
in ('body', 'div'):
3474 ele
= xpath_element(dfxp
, [_x('.//ttml:' + p
), './/' + p
])
3477 style
= styles
.get(ele
.get('style'))
3480 default_style
.update(style
)
3482 for para
, index
in zip(paras
, itertools
.count(1)):
3483 begin_time
= parse_dfxp_time_expr(para
.attrib
.get('begin'))
3484 end_time
= parse_dfxp_time_expr(para
.attrib
.get('end'))
3485 dur
= parse_dfxp_time_expr(para
.attrib
.get('dur'))
3486 if begin_time
is None:
3491 end_time
= begin_time
+ dur
3492 out
.append('%d\n%s --> %s\n%s\n\n' % (
3494 srt_subtitles_timecode(begin_time
),
3495 srt_subtitles_timecode(end_time
),
3501 def cli_option(params
, command_option
, param
, separator
=None):
3502 param
= params
.get(param
)
3503 return ([] if param
is None
3504 else [command_option
, str(param
)] if separator
is None
3505 else [f
'{command_option}{separator}{param}'])
3508 def cli_bool_option(params
, command_option
, param
, true_value
='true', false_value
='false', separator
=None):
3509 param
= params
.get(param
)
3510 assert param
in (True, False, None)
3511 return cli_option({True: true_value, False: false_value}
, command_option
, param
, separator
)
3514 def cli_valueless_option(params
, command_option
, param
, expected_value
=True):
3515 return [command_option
] if params
.get(param
) == expected_value
else []
3518 def cli_configuration_args(argdict
, keys
, default
=[], use_compat
=True):
3519 if isinstance(argdict
, (list, tuple)): # for backward compatibility
3526 assert isinstance(argdict
, dict)
3528 assert isinstance(keys
, (list, tuple))
3529 for key_list
in keys
:
3530 arg_list
= list(filter(
3531 lambda x
: x
is not None,
3532 [argdict
.get(key
.lower()) for key
in variadic(key_list
)]))
3534 return [arg
for args
in arg_list
for arg
in args
]
3538 def _configuration_args(main_key
, argdict
, exe
, keys
=None, default
=[], use_compat
=True):
3539 main_key
, exe
= main_key
.lower(), exe
.lower()
3540 root_key
= exe
if main_key
== exe
else f
'{main_key}+{exe}'
3541 keys
= [f
'{root_key}{k}' for k
in (keys
or [''])]
3542 if root_key
in keys
:
3544 keys
.append((main_key
, exe
))
3545 keys
.append('default')
3548 return cli_configuration_args(argdict
, keys
, default
, use_compat
)
3552 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3611 'iw': 'heb', # Replaced by he in 1989 revision
3621 'in': 'ind', # Replaced by id in 1989 revision
3737 'ji': 'yid', # Replaced by yi in 1989 revision
3745 def short2long(cls
, code
):
3746 """Convert language code from ISO 639-1 to ISO 639-2/T"""
3747 return cls
._lang
_map
.get(code
[:2])
3750 def long2short(cls
, code
):
3751 """Convert language code from ISO 639-2/T to ISO 639-1"""
3752 for short_name
, long_name
in cls
._lang
_map
.items():
3753 if long_name
== code
:
3758 # From http://data.okfn.org/data/core/country-list
3760 'AF': 'Afghanistan',
3761 'AX': 'Åland Islands',
3764 'AS': 'American Samoa',
3769 'AG': 'Antigua and Barbuda',
3786 'BO': 'Bolivia, Plurinational State of',
3787 'BQ': 'Bonaire, Sint Eustatius and Saba',
3788 'BA': 'Bosnia and Herzegovina',
3790 'BV': 'Bouvet Island',
3792 'IO': 'British Indian Ocean Territory',
3793 'BN': 'Brunei Darussalam',
3795 'BF': 'Burkina Faso',
3801 'KY': 'Cayman Islands',
3802 'CF': 'Central African Republic',
3806 'CX': 'Christmas Island',
3807 'CC': 'Cocos (Keeling) Islands',
3811 'CD': 'Congo, the Democratic Republic of the',
3812 'CK': 'Cook Islands',
3814 'CI': 'Côte d\'Ivoire',
3819 'CZ': 'Czech Republic',
3823 'DO': 'Dominican Republic',
3826 'SV': 'El Salvador',
3827 'GQ': 'Equatorial Guinea',
3831 'FK': 'Falkland Islands (Malvinas)',
3832 'FO': 'Faroe Islands',
3836 'GF': 'French Guiana',
3837 'PF': 'French Polynesia',
3838 'TF': 'French Southern Territories',
3853 'GW': 'Guinea-Bissau',
3856 'HM': 'Heard Island and McDonald Islands',
3857 'VA': 'Holy See (Vatican City State)',
3864 'IR': 'Iran, Islamic Republic of',
3867 'IM': 'Isle of Man',
3877 'KP': 'Korea, Democratic People\'s Republic of',
3878 'KR': 'Korea, Republic of',
3881 'LA': 'Lao People\'s Democratic Republic',
3887 'LI': 'Liechtenstein',
3891 'MK': 'Macedonia, the Former Yugoslav Republic of',
3898 'MH': 'Marshall Islands',
3904 'FM': 'Micronesia, Federated States of',
3905 'MD': 'Moldova, Republic of',
3916 'NL': 'Netherlands',
3917 'NC': 'New Caledonia',
3918 'NZ': 'New Zealand',
3923 'NF': 'Norfolk Island',
3924 'MP': 'Northern Mariana Islands',
3929 'PS': 'Palestine, State of',
3931 'PG': 'Papua New Guinea',
3934 'PH': 'Philippines',
3938 'PR': 'Puerto Rico',
3942 'RU': 'Russian Federation',
3944 'BL': 'Saint Barthélemy',
3945 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
3946 'KN': 'Saint Kitts and Nevis',
3947 'LC': 'Saint Lucia',
3948 'MF': 'Saint Martin (French part)',
3949 'PM': 'Saint Pierre and Miquelon',
3950 'VC': 'Saint Vincent and the Grenadines',
3953 'ST': 'Sao Tome and Principe',
3954 'SA': 'Saudi Arabia',
3958 'SL': 'Sierra Leone',
3960 'SX': 'Sint Maarten (Dutch part)',
3963 'SB': 'Solomon Islands',
3965 'ZA': 'South Africa',
3966 'GS': 'South Georgia and the South Sandwich Islands',
3967 'SS': 'South Sudan',
3972 'SJ': 'Svalbard and Jan Mayen',
3975 'CH': 'Switzerland',
3976 'SY': 'Syrian Arab Republic',
3977 'TW': 'Taiwan, Province of China',
3979 'TZ': 'Tanzania, United Republic of',
3981 'TL': 'Timor-Leste',
3985 'TT': 'Trinidad and Tobago',
3988 'TM': 'Turkmenistan',
3989 'TC': 'Turks and Caicos Islands',
3993 'AE': 'United Arab Emirates',
3994 'GB': 'United Kingdom',
3995 'US': 'United States',
3996 'UM': 'United States Minor Outlying Islands',
4000 'VE': 'Venezuela, Bolivarian Republic of',
4002 'VG': 'Virgin Islands, British',
4003 'VI': 'Virgin Islands, U.S.',
4004 'WF': 'Wallis and Futuna',
4005 'EH': 'Western Sahara',
4009 # Not ISO 3166 codes, but used for IP blocks
4010 'AP': 'Asia/Pacific Region',
4015 def short2full(cls
, code
):
4016 """Convert an ISO 3166-2 country code to the corresponding full name"""
4017 return cls
._country
_map
.get(code
.upper())
4021 # Major IPv4 address blocks per country
4023 'AD': '46.172.224.0/19',
4024 'AE': '94.200.0.0/13',
4025 'AF': '149.54.0.0/17',
4026 'AG': '209.59.64.0/18',
4027 'AI': '204.14.248.0/21',
4028 'AL': '46.99.0.0/16',
4029 'AM': '46.70.0.0/15',
4030 'AO': '105.168.0.0/13',
4031 'AP': '182.50.184.0/21',
4032 'AQ': '23.154.160.0/24',
4033 'AR': '181.0.0.0/12',
4034 'AS': '202.70.112.0/20',
4035 'AT': '77.116.0.0/14',
4036 'AU': '1.128.0.0/11',
4037 'AW': '181.41.0.0/18',
4038 'AX': '185.217.4.0/22',
4039 'AZ': '5.197.0.0/16',
4040 'BA': '31.176.128.0/17',
4041 'BB': '65.48.128.0/17',
4042 'BD': '114.130.0.0/16',
4044 'BF': '102.178.0.0/15',
4045 'BG': '95.42.0.0/15',
4046 'BH': '37.131.0.0/17',
4047 'BI': '154.117.192.0/18',
4048 'BJ': '137.255.0.0/16',
4049 'BL': '185.212.72.0/23',
4050 'BM': '196.12.64.0/18',
4051 'BN': '156.31.0.0/16',
4052 'BO': '161.56.0.0/16',
4053 'BQ': '161.0.80.0/20',
4054 'BR': '191.128.0.0/12',
4055 'BS': '24.51.64.0/18',
4056 'BT': '119.2.96.0/19',
4057 'BW': '168.167.0.0/16',
4058 'BY': '178.120.0.0/13',
4059 'BZ': '179.42.192.0/18',
4060 'CA': '99.224.0.0/11',
4061 'CD': '41.243.0.0/16',
4062 'CF': '197.242.176.0/21',
4063 'CG': '160.113.0.0/16',
4064 'CH': '85.0.0.0/13',
4065 'CI': '102.136.0.0/14',
4066 'CK': '202.65.32.0/19',
4067 'CL': '152.172.0.0/14',
4068 'CM': '102.244.0.0/14',
4069 'CN': '36.128.0.0/10',
4070 'CO': '181.240.0.0/12',
4071 'CR': '201.192.0.0/12',
4072 'CU': '152.206.0.0/15',
4073 'CV': '165.90.96.0/19',
4074 'CW': '190.88.128.0/17',
4075 'CY': '31.153.0.0/16',
4076 'CZ': '88.100.0.0/14',
4078 'DJ': '197.241.0.0/17',
4079 'DK': '87.48.0.0/12',
4080 'DM': '192.243.48.0/20',
4081 'DO': '152.166.0.0/15',
4082 'DZ': '41.96.0.0/12',
4083 'EC': '186.68.0.0/15',
4084 'EE': '90.190.0.0/15',
4085 'EG': '156.160.0.0/11',
4086 'ER': '196.200.96.0/20',
4087 'ES': '88.0.0.0/11',
4088 'ET': '196.188.0.0/14',
4089 'EU': '2.16.0.0/13',
4090 'FI': '91.152.0.0/13',
4091 'FJ': '144.120.0.0/16',
4092 'FK': '80.73.208.0/21',
4093 'FM': '119.252.112.0/20',
4094 'FO': '88.85.32.0/19',
4096 'GA': '41.158.0.0/15',
4098 'GD': '74.122.88.0/21',
4099 'GE': '31.146.0.0/16',
4100 'GF': '161.22.64.0/18',
4101 'GG': '62.68.160.0/19',
4102 'GH': '154.160.0.0/12',
4103 'GI': '95.164.0.0/16',
4104 'GL': '88.83.0.0/19',
4105 'GM': '160.182.0.0/15',
4106 'GN': '197.149.192.0/18',
4107 'GP': '104.250.0.0/19',
4108 'GQ': '105.235.224.0/20',
4109 'GR': '94.64.0.0/13',
4110 'GT': '168.234.0.0/16',
4111 'GU': '168.123.0.0/16',
4112 'GW': '197.214.80.0/20',
4113 'GY': '181.41.64.0/18',
4114 'HK': '113.252.0.0/14',
4115 'HN': '181.210.0.0/16',
4116 'HR': '93.136.0.0/13',
4117 'HT': '148.102.128.0/17',
4118 'HU': '84.0.0.0/14',
4119 'ID': '39.192.0.0/10',
4120 'IE': '87.32.0.0/12',
4121 'IL': '79.176.0.0/13',
4122 'IM': '5.62.80.0/20',
4123 'IN': '117.192.0.0/10',
4124 'IO': '203.83.48.0/21',
4125 'IQ': '37.236.0.0/14',
4126 'IR': '2.176.0.0/12',
4127 'IS': '82.221.0.0/16',
4128 'IT': '79.0.0.0/10',
4129 'JE': '87.244.64.0/18',
4130 'JM': '72.27.0.0/17',
4131 'JO': '176.29.0.0/16',
4132 'JP': '133.0.0.0/8',
4133 'KE': '105.48.0.0/12',
4134 'KG': '158.181.128.0/17',
4135 'KH': '36.37.128.0/17',
4136 'KI': '103.25.140.0/22',
4137 'KM': '197.255.224.0/20',
4138 'KN': '198.167.192.0/19',
4139 'KP': '175.45.176.0/22',
4140 'KR': '175.192.0.0/10',
4141 'KW': '37.36.0.0/14',
4142 'KY': '64.96.0.0/15',
4143 'KZ': '2.72.0.0/13',
4144 'LA': '115.84.64.0/18',
4145 'LB': '178.135.0.0/16',
4146 'LC': '24.92.144.0/20',
4147 'LI': '82.117.0.0/19',
4148 'LK': '112.134.0.0/15',
4149 'LR': '102.183.0.0/16',
4150 'LS': '129.232.0.0/17',
4151 'LT': '78.56.0.0/13',
4152 'LU': '188.42.0.0/16',
4153 'LV': '46.109.0.0/16',
4154 'LY': '41.252.0.0/14',
4155 'MA': '105.128.0.0/11',
4156 'MC': '88.209.64.0/18',
4157 'MD': '37.246.0.0/16',
4158 'ME': '178.175.0.0/17',
4159 'MF': '74.112.232.0/21',
4160 'MG': '154.126.0.0/17',
4161 'MH': '117.103.88.0/21',
4162 'MK': '77.28.0.0/15',
4163 'ML': '154.118.128.0/18',
4164 'MM': '37.111.0.0/17',
4165 'MN': '49.0.128.0/17',
4166 'MO': '60.246.0.0/16',
4167 'MP': '202.88.64.0/20',
4168 'MQ': '109.203.224.0/19',
4169 'MR': '41.188.64.0/18',
4170 'MS': '208.90.112.0/22',
4171 'MT': '46.11.0.0/16',
4172 'MU': '105.16.0.0/12',
4173 'MV': '27.114.128.0/18',
4174 'MW': '102.70.0.0/15',
4175 'MX': '187.192.0.0/11',
4176 'MY': '175.136.0.0/13',
4177 'MZ': '197.218.0.0/15',
4178 'NA': '41.182.0.0/16',
4179 'NC': '101.101.0.0/18',
4180 'NE': '197.214.0.0/18',
4181 'NF': '203.17.240.0/22',
4182 'NG': '105.112.0.0/12',
4183 'NI': '186.76.0.0/15',
4184 'NL': '145.96.0.0/11',
4185 'NO': '84.208.0.0/13',
4186 'NP': '36.252.0.0/15',
4187 'NR': '203.98.224.0/19',
4188 'NU': '49.156.48.0/22',
4189 'NZ': '49.224.0.0/14',
4190 'OM': '5.36.0.0/15',
4191 'PA': '186.72.0.0/15',
4192 'PE': '186.160.0.0/14',
4193 'PF': '123.50.64.0/18',
4194 'PG': '124.240.192.0/19',
4195 'PH': '49.144.0.0/13',
4196 'PK': '39.32.0.0/11',
4197 'PL': '83.0.0.0/11',
4198 'PM': '70.36.0.0/20',
4199 'PR': '66.50.0.0/16',
4200 'PS': '188.161.0.0/16',
4201 'PT': '85.240.0.0/13',
4202 'PW': '202.124.224.0/20',
4203 'PY': '181.120.0.0/14',
4204 'QA': '37.210.0.0/15',
4205 'RE': '102.35.0.0/16',
4206 'RO': '79.112.0.0/13',
4207 'RS': '93.86.0.0/15',
4208 'RU': '5.136.0.0/13',
4209 'RW': '41.186.0.0/16',
4210 'SA': '188.48.0.0/13',
4211 'SB': '202.1.160.0/19',
4212 'SC': '154.192.0.0/11',
4213 'SD': '102.120.0.0/13',
4214 'SE': '78.64.0.0/12',
4215 'SG': '8.128.0.0/10',
4216 'SI': '188.196.0.0/14',
4217 'SK': '78.98.0.0/15',
4218 'SL': '102.143.0.0/17',
4219 'SM': '89.186.32.0/19',
4220 'SN': '41.82.0.0/15',
4221 'SO': '154.115.192.0/18',
4222 'SR': '186.179.128.0/17',
4223 'SS': '105.235.208.0/21',
4224 'ST': '197.159.160.0/19',
4225 'SV': '168.243.0.0/16',
4226 'SX': '190.102.0.0/20',
4228 'SZ': '41.84.224.0/19',
4229 'TC': '65.255.48.0/20',
4230 'TD': '154.68.128.0/19',
4231 'TG': '196.168.0.0/14',
4232 'TH': '171.96.0.0/13',
4233 'TJ': '85.9.128.0/18',
4234 'TK': '27.96.24.0/21',
4235 'TL': '180.189.160.0/20',
4236 'TM': '95.85.96.0/19',
4237 'TN': '197.0.0.0/11',
4238 'TO': '175.176.144.0/21',
4239 'TR': '78.160.0.0/11',
4240 'TT': '186.44.0.0/15',
4241 'TV': '202.2.96.0/19',
4242 'TW': '120.96.0.0/11',
4243 'TZ': '156.156.0.0/14',
4244 'UA': '37.52.0.0/14',
4245 'UG': '102.80.0.0/13',
4247 'UY': '167.56.0.0/13',
4248 'UZ': '84.54.64.0/18',
4249 'VA': '212.77.0.0/19',
4250 'VC': '207.191.240.0/21',
4251 'VE': '186.88.0.0/13',
4252 'VG': '66.81.192.0/20',
4253 'VI': '146.226.0.0/16',
4254 'VN': '14.160.0.0/11',
4255 'VU': '202.80.32.0/20',
4256 'WF': '117.20.32.0/21',
4257 'WS': '202.4.32.0/19',
4258 'YE': '134.35.0.0/16',
4259 'YT': '41.242.116.0/22',
4260 'ZA': '41.0.0.0/11',
4261 'ZM': '102.144.0.0/13',
4262 'ZW': '102.177.192.0/18',
4266 def random_ipv4(cls
, code_or_block
):
4267 if len(code_or_block
) == 2:
4268 block
= cls
._country
_ip
_map
.get(code_or_block
.upper())
4272 block
= code_or_block
4273 addr
, preflen
= block
.split('/')
4274 addr_min
= struct
.unpack('!L', socket
.inet_aton(addr
))[0]
4275 addr_max
= addr_min |
(0xffffffff >> int(preflen
))
4276 return str(socket
.inet_ntoa(
4277 struct
.pack('!L', random
.randint(addr_min
, addr_max
))))
4280 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4281 # released into Public Domain
4282 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4284 def long_to_bytes(n
, blocksize
=0):
4285 """long_to_bytes(n:long, blocksize:int) : string
4286 Convert a long integer to a byte string.
4288 If optional blocksize is given and greater than zero, pad the front of the
4289 byte string with binary zeros so that the length is a multiple of
4292 # after much testing, this algorithm was deemed to be the fastest
4296 s
= struct
.pack('>I', n
& 0xffffffff) + s
4298 # strip off leading zeros
4299 for i
in range(len(s
)):
4300 if s
[i
] != b
'\000'[0]:
4303 # only happens when n == 0
4307 # add back some pad bytes. this could be done more efficiently w.r.t. the
4308 # de-padding being done above, but sigh...
4309 if blocksize
> 0 and len(s
) % blocksize
:
4310 s
= (blocksize
- len(s
) % blocksize
) * b
'\000' + s
4314 def bytes_to_long(s
):
4315 """bytes_to_long(string) : long
4316 Convert a byte string to a long integer.
4318 This is (essentially) the inverse of long_to_bytes().
4323 extra
= (4 - length
% 4)
4324 s
= b
'\000' * extra
+ s
4325 length
= length
+ extra
4326 for i
in range(0, length
, 4):
4327 acc
= (acc
<< 32) + struct
.unpack('>I', s
[i
:i
+ 4])[0]
4331 def ohdave_rsa_encrypt(data
, exponent
, modulus
):
4333 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4336 data: data to encrypt, bytes-like object
4337 exponent, modulus: parameter e and N of RSA algorithm, both integer
4338 Output: hex string of encrypted data
4340 Limitation: supports one block encryption only
4343 payload
= int(binascii
.hexlify(data
[::-1]), 16)
4344 encrypted
= pow(payload
, exponent
, modulus
)
4345 return '%x' % encrypted
4348 def pkcs1pad(data
, length
):
4350 Padding input data with PKCS#1 scheme
4352 @param {int[]} data input data
4353 @param {int} length target length
4354 @returns {int[]} padded data
4356 if len(data
) > length
- 11:
4357 raise ValueError('Input data too long for PKCS#1 padding')
4359 pseudo_random
= [random
.randint(0, 254) for _
in range(length
- len(data
) - 3)]
4360 return [0, 2] + pseudo_random
+ [0] + data
4363 def _base_n_table(n
, table
):
4364 if not table
and not n
:
4365 raise ValueError('Either table or n must be specified')
4366 table
= (table
or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n
]
4368 if n
and n
!= len(table
):
4369 raise ValueError(f
'base {n} exceeds table length {len(table)}')
4373 def encode_base_n(num
, n
=None, table
=None):
4374 """Convert given int to a base-n string"""
4375 table
= _base_n_table(n
, table
)
4379 result
, base
= '', len(table
)
4381 result
= table
[num
% base
] + result
4386 def decode_base_n(string
, n
=None, table
=None):
4387 """Convert given base-n string to int"""
4388 table
= {char: index for index, char in enumerate(_base_n_table(n, table))}
4389 result
, base
= 0, len(table
)
4391 result
= result
* base
+ table
[char
]
4395 def decode_packed_codes(code
):
4396 mobj
= re
.search(PACKED_CODES_RE
, code
)
4397 obfuscated_code
, base
, count
, symbols
= mobj
.groups()
4400 symbols
= symbols
.split('|')
4405 base_n_count
= encode_base_n(count
, base
)
4406 symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
4409 r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)],
4413 def caesar(s
, alphabet
, shift
):
4418 alphabet
[(alphabet
.index(c
) + shift
) % l
] if c
in alphabet
else c
4423 return caesar(s
, r
'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4426 def parse_m3u8_attributes(attrib
):
4428 for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
):
4429 if val
.startswith('"'):
4435 def urshift(val
, n
):
4436 return val
>> n
if val
>= 0 else (val
+ 0x100000000) >> n
4439 def write_xattr(path
, key
, value
):
4440 # Windows: Write xattrs to NTFS Alternate Data Streams:
4441 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4442 if compat_os_name
== 'nt':
4443 assert ':' not in key
4444 assert os
.path
.exists(path
)
4447 with open(f
'{path}:{key}', 'wb') as f
:
4449 except OSError as e
:
4450 raise XAttrMetadataError(e
.errno
, e
.strerror
)
4453 # UNIX Method 1. Use os.setxattr/xattrs/pyxattrs modules
4456 if callable(getattr(os
, 'setxattr', None)):
4457 setxattr
= os
.setxattr
4458 elif getattr(xattr
, '_yt_dlp__identifier', None) == 'pyxattr':
4459 # Unicode arguments are not supported in pyxattr until version 0.5.0
4460 # See https://github.com/ytdl-org/youtube-dl/issues/5498
4461 if version_tuple(xattr
.__version
__) >= (0, 5, 0):
4462 setxattr
= xattr
.set
4464 setxattr
= xattr
.setxattr
4468 setxattr(path
, key
, value
)
4469 except OSError as e
:
4470 raise XAttrMetadataError(e
.errno
, e
.strerror
)
4473 # UNIX Method 2. Use setfattr/xattr executables
4474 exe
= ('setfattr' if check_executable('setfattr', ['--version'])
4475 else 'xattr' if check_executable('xattr', ['-h']) else None)
4477 raise XAttrUnavailableError(
4478 'Couldn\'t find a tool to set the xattrs. Install either the "xattr" or "pyxattr" Python modules or the '
4479 + ('"xattr" binary' if sys
.platform
!= 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
4481 value
= value
.decode()
4483 _
, stderr
, returncode
= Popen
.run(
4484 [exe
, '-w', key
, value
, path
] if exe
== 'xattr' else [exe
, '-n', key
, '-v', value
, path
],
4485 text
=True, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
)
4486 except OSError as e
:
4487 raise XAttrMetadataError(e
.errno
, e
.strerror
)
4489 raise XAttrMetadataError(returncode
, stderr
)
4492 def random_birthday(year_field
, month_field
, day_field
):
4493 start_date
= datetime
.date(1950, 1, 1)
4494 end_date
= datetime
.date(1995, 12, 31)
4495 offset
= random
.randint(0, (end_date
- start_date
).days
)
4496 random_date
= start_date
+ datetime
.timedelta(offset
)
4498 year_field
: str(random_date
.year
),
4499 month_field
: str(random_date
.month
),
4500 day_field
: str(random_date
.day
),
4504 def find_available_port(interface
=''):
4506 with socket
.socket() as sock
:
4507 sock
.bind((interface
, 0))
4508 return sock
.getsockname()[1]
4513 # Templates for internet shortcut files, which are plain text files.
4514 DOT_URL_LINK_TEMPLATE
= '''\
4519 DOT_WEBLOC_LINK_TEMPLATE
= '''\
4520 <?xml version="1.0" encoding="UTF-8"?>
4521 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4522 <plist version="1.0">
4525 \t<string>%(url)s</string>
4530 DOT_DESKTOP_LINK_TEMPLATE
= '''\
4540 'url': DOT_URL_LINK_TEMPLATE
,
4541 'desktop': DOT_DESKTOP_LINK_TEMPLATE
,
4542 'webloc': DOT_WEBLOC_LINK_TEMPLATE
,
4546 def iri_to_uri(iri
):
4548 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
4550 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
4553 iri_parts
= urllib
.parse
.urlparse(iri
)
4555 if '[' in iri_parts
.netloc
:
4556 raise ValueError('IPv6 URIs are not, yet, supported.')
4557 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
4559 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
4562 if iri_parts
.username
:
4563 net_location
+= urllib
.parse
.quote(iri_parts
.username
, safe
=r
"!$%&'()*+,~")
4564 if iri_parts
.password
is not None:
4565 net_location
+= ':' + urllib
.parse
.quote(iri_parts
.password
, safe
=r
"!$%&'()*+,~")
4568 net_location
+= iri_parts
.hostname
.encode('idna').decode() # Punycode for Unicode hostnames.
4569 # The 'idna' encoding produces ASCII text.
4570 if iri_parts
.port
is not None and iri_parts
.port
!= 80:
4571 net_location
+= ':' + str(iri_parts
.port
)
4573 return urllib
.parse
.urlunparse(
4577 urllib
.parse
.quote_plus(iri_parts
.path
, safe
=r
"!$%&'()*+,/:;=@|~"),
4579 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
4580 urllib
.parse
.quote_plus(iri_parts
.params
, safe
=r
"!$%&'()*+,/:;=@|~"),
4582 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
4583 urllib
.parse
.quote_plus(iri_parts
.query
, safe
=r
"!$%&'()*+,/:;=?@{|}~"),
4585 urllib
.parse
.quote_plus(iri_parts
.fragment
, safe
=r
"!#$%&'()*+,/:;=?@{|}~")))
4587 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
4590 def to_high_limit_path(path
):
4591 if sys
.platform
in ['win32', 'cygwin']:
4592 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
4593 return '\\\\?\\' + os
.path
.abspath(path
)
4598 def format_field(obj
, field
=None, template
='%s', ignore
=NO_DEFAULT
, default
='', func
=IDENTITY
):
4599 val
= traversal
.traverse_obj(obj
, *variadic(field
))
4600 if not val
if ignore
is NO_DEFAULT
else val
in variadic(ignore
):
4602 return template
% func(val
)
4605 def clean_podcast_url(url
):
4606 url
= re
.sub(r
'''(?x)
4610 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
4615 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
4618 cn\.co| # https://podcorn.com/analytics-prefix/
4619 st\.fm # https://podsights.com/docs/
4624 return re
.sub(r
'^\w+://(\w+://)', r
'\1', url
)
4627 _HEX_TABLE
= '0123456789abcdef'
4630 def random_uuidv4():
4631 return re
.sub(r
'[xy]', lambda x
: _HEX_TABLE
[random
.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
4634 def make_dir(path
, to_screen
=None):
4636 dn
= os
.path
.dirname(path
)
4638 os
.makedirs(dn
, exist_ok
=True)
4640 except OSError as err
:
4641 if callable(to_screen
) is not None:
4642 to_screen(f
'unable to create directory {err}')
4646 def get_executable_path():
4647 from ..update
import _get_variant_and_executable_path
4649 return os
.path
.dirname(os
.path
.abspath(_get_variant_and_executable_path()[1]))
4652 def get_user_config_dirs(package_name
):
4653 # .config (e.g. ~/.config/package_name)
4654 xdg_config_home
= os
.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
4655 yield os
.path
.join(xdg_config_home
, package_name
)
4657 # appdata (%APPDATA%/package_name)
4658 appdata_dir
= os
.getenv('appdata')
4660 yield os
.path
.join(appdata_dir
, package_name
)
4662 # home (~/.package_name)
4663 yield os
.path
.join(compat_expanduser('~'), f
'.{package_name}')
4666 def get_system_config_dirs(package_name
):
4668 yield os
.path
.join('/etc', package_name
)
4671 def time_seconds(**kwargs
):
4673 Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
4675 return time
.time() + datetime
.timedelta(**kwargs
).total_seconds()
4678 # create a JSON Web Signature (jws) with HS256 algorithm
4679 # the resulting format is in JWS Compact Serialization
4680 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
4681 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
4682 def jwt_encode_hs256(payload_data
, key
, headers
={}):
4688 header_data
.update(headers
)
4689 header_b64
= base64
.b64encode(json
.dumps(header_data
).encode())
4690 payload_b64
= base64
.b64encode(json
.dumps(payload_data
).encode())
4691 h
= hmac
.new(key
.encode(), header_b64
+ b
'.' + payload_b64
, hashlib
.sha256
)
4692 signature_b64
= base64
.b64encode(h
.digest())
4693 token
= header_b64
+ b
'.' + payload_b64
+ b
'.' + signature_b64
4697 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
4698 def jwt_decode_hs256(jwt
):
4699 header_b64
, payload_b64
, signature_b64
= jwt
.split('.')
4700 # add trailing ='s that may have been stripped, superfluous ='s are ignored
4701 payload_data
= json
.loads(base64
.urlsafe_b64decode(f
'{payload_b64}==='))
4705 WINDOWS_VT_MODE
= False if compat_os_name
== 'nt' else None
4709 def supports_terminal_sequences(stream
):
4710 if compat_os_name
== 'nt':
4711 if not WINDOWS_VT_MODE
:
4713 elif not os
.getenv('TERM'):
4716 return stream
.isatty()
4717 except BaseException
:
4721 def windows_enable_vt_mode():
4722 """Ref: https://bugs.python.org/issue30075 """
4723 if get_windows_version() < (10, 0, 10586):
4727 import ctypes
.wintypes
4730 ENABLE_VIRTUAL_TERMINAL_PROCESSING
= 0x0004
4732 dll
= ctypes
.WinDLL('kernel32', use_last_error
=False)
4733 handle
= os
.open('CONOUT$', os
.O_RDWR
)
4735 h_out
= ctypes
.wintypes
.HANDLE(msvcrt
.get_osfhandle(handle
))
4736 dw_original_mode
= ctypes
.wintypes
.DWORD()
4737 success
= dll
.GetConsoleMode(h_out
, ctypes
.byref(dw_original_mode
))
4739 raise Exception('GetConsoleMode failed')
4741 success
= dll
.SetConsoleMode(h_out
, ctypes
.wintypes
.DWORD(
4742 dw_original_mode
.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING
))
4744 raise Exception('SetConsoleMode failed')
4748 global WINDOWS_VT_MODE
4749 WINDOWS_VT_MODE
= True
4750 supports_terminal_sequences
.cache_clear()
4753 _terminal_sequences_re
= re
.compile('\033\\[[^m]+m')
4756 def remove_terminal_sequences(string
):
4757 return _terminal_sequences_re
.sub('', string
)
4760 def number_of_digits(number
):
4761 return len('%d' % number
)
4764 def join_nonempty(*values
, delim
='-', from_dict
=None):
4765 if from_dict
is not None:
4766 values
= (traversal
.traverse_obj(from_dict
, variadic(v
)) for v
in values
)
4767 return delim
.join(map(str, filter(None, values
)))
4770 def scale_thumbnails_to_max_format_width(formats
, thumbnails
, url_width_re
):
4772 Find the largest format dimensions in terms of video width and, for each thumbnail:
4773 * Modify the URL: Match the width with the provided regex and replace with the former width
4776 This function is useful with video services that scale the provided thumbnails on demand
4778 _keys
= ('width', 'height')
4779 max_dimensions
= max(
4780 (tuple(format
.get(k
) or 0 for k
in _keys
) for format
in formats
),
4782 if not max_dimensions
[0]:
4786 {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])}
,
4787 dict(zip(_keys
, max_dimensions
)), thumbnail
)
4788 for thumbnail
in thumbnails
4792 def parse_http_range(range):
4793 """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
4795 return None, None, None
4796 crg
= re
.search(r
'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
4798 return None, None, None
4799 return int(crg
.group(1)), int_or_none(crg
.group(2)), int_or_none(crg
.group(3))
4802 def read_stdin(what
):
4804 eof
= 'Ctrl+Z' if compat_os_name
== 'nt' else 'Ctrl+D'
4805 write_string(f
'Reading {what} from STDIN - EOF ({eof}) to end:\n')
4809 def determine_file_encoding(data
):
4811 Detect the text encoding used
4812 @returns (encoding, bytes to skip)
4815 # BOM marks are given priority over declarations
4816 for bom
, enc
in BOMS
:
4817 if data
.startswith(bom
):
4818 return enc
, len(bom
)
4820 # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
4821 # We ignore the endianness to get a good enough match
4822 data
= data
.replace(b
'\0', b
'')
4823 mobj
= re
.match(rb
'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data
)
4824 return mobj
.group(1).decode() if mobj
else None, 0
4831 __initialized
= False
4833 def __init__(self
, parser
, label
=None):
4834 self
.parser
, self
.label
= parser
, label
4835 self
._loaded
_paths
, self
.configs
= set(), []
4837 def init(self
, args
=None, filename
=None):
4838 assert not self
.__initialized
4839 self
.own_args
, self
.filename
= args
, filename
4840 return self
.load_configs()
4842 def load_configs(self
):
4845 location
= os
.path
.realpath(self
.filename
)
4846 directory
= os
.path
.dirname(location
)
4847 if location
in self
._loaded
_paths
:
4849 self
._loaded
_paths
.add(location
)
4851 self
.__initialized
= True
4852 opts
, _
= self
.parser
.parse_known_args(self
.own_args
)
4853 self
.parsed_args
= self
.own_args
4854 for location
in opts
.config_locations
or []:
4856 if location
in self
._loaded
_paths
:
4858 self
._loaded
_paths
.add(location
)
4859 self
.append_config(shlex
.split(read_stdin('options'), comments
=True), label
='stdin')
4861 location
= os
.path
.join(directory
, expand_path(location
))
4862 if os
.path
.isdir(location
):
4863 location
= os
.path
.join(location
, 'yt-dlp.conf')
4864 if not os
.path
.exists(location
):
4865 self
.parser
.error(f
'config location {location} does not exist')
4866 self
.append_config(self
.read_file(location
), location
)
4870 label
= join_nonempty(
4871 self
.label
, 'config', f
'"{self.filename}"' if self
.filename
else '',
4873 return join_nonempty(
4874 self
.own_args
is not None and f
'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
4875 *(f
'\n{c}'.replace('\n', '\n| ')[1:] for c
in self
.configs
),
4879 def read_file(filename
, default
=[]):
4881 optionf
= open(filename
, 'rb')
4883 return default
# silently skip if file is not present
4885 enc
, skip
= determine_file_encoding(optionf
.read(512))
4886 optionf
.seek(skip
, io
.SEEK_SET
)
4888 enc
= None # silently skip read errors
4890 # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
4891 contents
= optionf
.read().decode(enc
or preferredencoding())
4892 res
= shlex
.split(contents
, comments
=True)
4893 except Exception as err
:
4894 raise ValueError(f
'Unable to parse "{filename}": {err}')
4900 def hide_login_info(opts
):
4901 PRIVATE_OPTS
= {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
4902 eqre
= re
.compile('^(?P<key>' + ('|'.join(re
.escape(po
) for po
in PRIVATE_OPTS
)) + ')=.+$')
4907 return m
.group('key') + '=PRIVATE'
4911 opts
= list(map(_scrub_eq
, opts
))
4912 for idx
, opt
in enumerate(opts
):
4913 if opt
in PRIVATE_OPTS
and idx
+ 1 < len(opts
):
4914 opts
[idx
+ 1] = 'PRIVATE'
4917 def append_config(self
, *args
, label
=None):
4918 config
= type(self
)(self
.parser
, label
)
4919 config
._loaded
_paths
= self
._loaded
_paths
4920 if config
.init(*args
):
4921 self
.configs
.append(config
)
4925 for config
in reversed(self
.configs
):
4926 yield from config
.all_args
4927 yield from self
.parsed_args
or []
4929 def parse_known_args(self
, **kwargs
):
4930 return self
.parser
.parse_known_args(self
.all_args
, **kwargs
)
4932 def parse_args(self
):
4933 return self
.parser
.parse_args(self
.all_args
)
4936 def merge_headers(*dicts
):
4937 """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
4938 return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
4941 def cached_method(f
):
4942 """Cache a method"""
4943 signature
= inspect
.signature(f
)
4946 def wrapper(self
, *args
, **kwargs
):
4947 bound_args
= signature
.bind(self
, *args
, **kwargs
)
4948 bound_args
.apply_defaults()
4949 key
= tuple(bound_args
.arguments
.values())[1:]
4951 cache
= vars(self
).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {}
)
4952 if key
not in cache
:
4953 cache
[key
] = f(self
, *args
, **kwargs
)
4958 class classproperty
:
4959 """property access for class methods with optional caching"""
4960 def __new__(cls
, func
=None, *args
, **kwargs
):
4962 return functools
.partial(cls
, *args
, **kwargs
)
4963 return super().__new
__(cls
)
4965 def __init__(self
, func
, *, cache
=False):
4966 functools
.update_wrapper(self
, func
)
4968 self
._cache
= {} if cache
else None
4970 def __get__(self
, _
, cls
):
4971 if self
._cache
is None:
4972 return self
.func(cls
)
4973 elif cls
not in self
._cache
:
4974 self
._cache
[cls
] = self
.func(cls
)
4975 return self
._cache
[cls
]
4978 class function_with_repr
:
4979 def __init__(self
, func
, repr_
=None):
4980 functools
.update_wrapper(self
, func
)
4981 self
.func
, self
.__repr
= func
, repr_
4983 def __call__(self
, *args
, **kwargs
):
4984 return self
.func(*args
, **kwargs
)
4987 def set_repr(cls
, repr_
):
4988 return functools
.partial(cls
, repr_
=repr_
)
4993 return f
'{self.func.__module__}.{self.func.__qualname__}'
4996 class Namespace(types
.SimpleNamespace
):
4997 """Immutable namespace"""
5000 return iter(self
.__dict
__.values())
5004 return self
.__dict
__.items()
5007 MEDIA_EXTENSIONS
= Namespace(
5008 common_video
=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
5009 video
=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
5010 common_audio
=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5011 audio
=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
5012 thumbnails
=('jpg', 'png', 'webp'),
5013 storyboards
=('mhtml', ),
5014 subtitles
=('srt', 'vtt', 'ass', 'lrc'),
5015 manifests
=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5017 MEDIA_EXTENSIONS
.video
+= MEDIA_EXTENSIONS
.common_video
5018 MEDIA_EXTENSIONS
.audio
+= MEDIA_EXTENSIONS
.common_audio
5020 KNOWN_EXTENSIONS
= (*MEDIA_EXTENSIONS
.video
, *MEDIA_EXTENSIONS
.audio
, *MEDIA_EXTENSIONS
.manifests
)
5025 for retry in RetryManager(...):
5028 except SomeException as err:
5032 attempt
, _error
= 0, None
5034 def __init__(self
, _retries
, _error_callback
, **kwargs
):
5035 self
.retries
= _retries
or 0
5036 self
.error_callback
= functools
.partial(_error_callback
, **kwargs
)
5038 def _should_retry(self
):
5039 return self
._error
is not NO_DEFAULT
and self
.attempt
<= self
.retries
5043 if self
._error
is NO_DEFAULT
:
5048 def error(self
, value
):
5052 while self
._should
_retry
():
5053 self
.error
= NO_DEFAULT
5057 self
.error_callback(self
.error
, self
.attempt
, self
.retries
)
5060 def report_retry(e
, count
, retries
, *, sleep_func
, info
, warn
, error
=None, suffix
=None):
5061 """Utility function for reporting retries"""
5064 return error(f
'{e}. Giving up after {count - 1} retries') if count
> 1 else error(str(e
))
5069 elif isinstance(e
, ExtractorError
):
5070 e
= remove_end(str_or_none(e
.cause
) or e
.orig_msg
, '.')
5071 warn(f
'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5073 delay
= float_or_none(sleep_func(n
=count
- 1)) if callable(sleep_func
) else sleep_func
5075 info(f
'Sleeping {delay:.2f} seconds ...')
5079 def make_archive_id(ie
, video_id
):
5080 ie_key
= ie
if isinstance(ie
, str) else ie
.ie_key()
5081 return f
'{ie_key.lower()} {video_id}'
5084 def truncate_string(s
, left
, right
=0):
5085 assert left
> 3 and right
>= 0
5086 if s
is None or len(s
) <= left
+ right
:
5088 return f
'{s[:left - 3]}...{s[-right:] if right else ""}'
5091 def orderedSet_from_options(options
, alias_dict
, *, use_regex
=False, start
=None):
5092 assert 'all' in alias_dict
, '"all" alias is required'
5093 requested
= list(start
or [])
5095 discard
= val
.startswith('-')
5099 if val
in alias_dict
:
5100 val
= alias_dict
[val
] if not discard
else [
5101 i
[1:] if i
.startswith('-') else f
'-{i}' for i
in alias_dict
[val
]]
5102 # NB: Do not allow regex in aliases for performance
5103 requested
= orderedSet_from_options(val
, alias_dict
, start
=requested
)
5106 current
= (filter(re
.compile(val
, re
.I
).fullmatch
, alias_dict
['all']) if use_regex
5107 else [val
] if val
in alias_dict
['all'] else None)
5109 raise ValueError(val
)
5112 for item
in current
:
5113 while item
in requested
:
5114 requested
.remove(item
)
5116 requested
.extend(current
)
5118 return orderedSet(requested
)
5123 regex
= r
' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
5125 default
= ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
5126 'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
5127 'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
5128 ytdl_default
= ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
5129 'height', 'width', 'proto', 'vext', 'abr', 'aext',
5130 'fps', 'fs_approx', 'source', 'id')
5133 'vcodec': {'type': 'ordered', 'regex': True,
5134 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
5135 'acodec': {'type': 'ordered', 'regex': True,
5136 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
5137 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
5138 'order': ['dv', '(hdr)?12', r
'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
5139 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
5140 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
5141 'vext': {'type': 'ordered', 'field': 'video_ext',
5142 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
5143 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
5144 'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
5145 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
5146 'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
5147 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}
,
5148 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
5149 'field': ('vcodec', 'acodec'),
5150 'function': lambda it
: int(any(v
!= 'none' for v
in it
))},
5151 'ie_pref': {'priority': True, 'type': 'extractor'}
,
5152 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)}
,
5153 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}
,
5154 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1}
,
5155 'quality': {'convert': 'float', 'default': -1}
,
5156 'filesize': {'convert': 'bytes'}
,
5157 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'}
,
5158 'id': {'convert': 'string', 'field': 'format_id'}
,
5159 'height': {'convert': 'float_none'}
,
5160 'width': {'convert': 'float_none'}
,
5161 'fps': {'convert': 'float_none'}
,
5162 'channels': {'convert': 'float_none', 'field': 'audio_channels'}
,
5163 'tbr': {'convert': 'float_none'}
,
5164 'vbr': {'convert': 'float_none'}
,
5165 'abr': {'convert': 'float_none'}
,
5166 'asr': {'convert': 'float_none'}
,
5167 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1}
,
5169 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')}
,
5170 'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
5171 'function': lambda it
: next(filter(None, it
), None)},
5172 'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
5173 'function': lambda it
: next(filter(None, it
), None)},
5174 'ext': {'type': 'combined', 'field': ('vext', 'aext')}
,
5175 'res': {'type': 'multiple', 'field': ('height', 'width'),
5176 'function': lambda it
: (lambda l
: min(l
) if l
else 0)(tuple(filter(None, it
)))},
5178 # Actual field names
5179 'format_id': {'type': 'alias', 'field': 'id'}
,
5180 'preference': {'type': 'alias', 'field': 'ie_pref'}
,
5181 'language_preference': {'type': 'alias', 'field': 'lang'}
,
5182 'source_preference': {'type': 'alias', 'field': 'source'}
,
5183 'protocol': {'type': 'alias', 'field': 'proto'}
,
5184 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}
,
5185 'audio_channels': {'type': 'alias', 'field': 'channels'}
,
5188 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True}
,
5189 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True}
,
5190 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True}
,
5191 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True}
,
5192 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True}
,
5193 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True}
,
5194 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True}
,
5195 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True}
,
5196 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True}
,
5197 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True}
,
5198 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True}
,
5199 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True}
,
5200 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True}
,
5201 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True}
,
5202 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}
,
5203 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}
,
5204 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}
,
5205 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}
,
5206 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}
,
5207 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}
,
5210 def __init__(self
, ydl
, field_preference
):
5213 self
.evaluate_params(self
.ydl
.params
, field_preference
)
5214 if ydl
.params
.get('verbose'):
5215 self
.print_verbose_info(self
.ydl
.write_debug
)
5217 def _get_field_setting(self
, field
, key
):
5218 if field
not in self
.settings
:
5219 if key
in ('forced', 'priority'):
5221 self
.ydl
.deprecated_feature(f
'Using arbitrary fields ({field}) for format sorting is '
5222 'deprecated and may be removed in a future version')
5223 self
.settings
[field
] = {}
5224 propObj
= self
.settings
[field
]
5225 if key
not in propObj
:
5226 type = propObj
.get('type')
5228 default
= 'preference' if type == 'extractor' else (field
,) if type in ('combined', 'multiple') else field
5229 elif key
== 'convert':
5230 default
= 'order' if type == 'ordered' else 'float_string' if field
else 'ignore'
5232 default
= {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}
.get(key
, None)
5233 propObj
[key
] = default
5236 def _resolve_field_value(self
, field
, value
, convertNone
=False):
5241 value
= value
.lower()
5242 conversion
= self
._get
_field
_setting
(field
, 'convert')
5243 if conversion
== 'ignore':
5245 if conversion
== 'string':
5247 elif conversion
== 'float_none':
5248 return float_or_none(value
)
5249 elif conversion
== 'bytes':
5250 return parse_bytes(value
)
5251 elif conversion
== 'order':
5252 order_list
= (self
._use
_free
_order
and self
._get
_field
_setting
(field
, 'order_free')) or self
._get
_field
_setting
(field
, 'order')
5253 use_regex
= self
._get
_field
_setting
(field
, 'regex')
5254 list_length
= len(order_list
)
5255 empty_pos
= order_list
.index('') if '' in order_list
else list_length
+ 1
5256 if use_regex
and value
is not None:
5257 for i
, regex
in enumerate(order_list
):
5258 if regex
and re
.match(regex
, value
):
5259 return list_length
- i
5260 return list_length
- empty_pos
# not in list
5261 else: # not regex or value = None
5262 return list_length
- (order_list
.index(value
) if value
in order_list
else empty_pos
)
5264 if value
.isnumeric():
5267 self
.settings
[field
]['convert'] = 'string'
5270 def evaluate_params(self
, params
, sort_extractor
):
5271 self
._use
_free
_order
= params
.get('prefer_free_formats', False)
5272 self
._sort
_user
= params
.get('format_sort', [])
5273 self
._sort
_extractor
= sort_extractor
5275 def add_item(field
, reverse
, closest
, limit_text
):
5276 field
= field
.lower()
5277 if field
in self
._order
:
5279 self
._order
.append(field
)
5280 limit
= self
._resolve
_field
_value
(field
, limit_text
)
5283 'closest': False if limit
is None else closest
,
5284 'limit_text': limit_text
,
5286 if field
in self
.settings
:
5287 self
.settings
[field
].update(data
)
5289 self
.settings
[field
] = data
5292 tuple(field
for field
in self
.default
if self
._get
_field
_setting
(field
, 'forced'))
5293 + (tuple() if params
.get('format_sort_force', False)
5294 else tuple(field
for field
in self
.default
if self
._get
_field
_setting
(field
, 'priority')))
5295 + tuple(self
._sort
_user
) + tuple(sort_extractor
) + self
.default
)
5297 for item
in sort_list
:
5298 match
= re
.match(self
.regex
, item
)
5300 raise ExtractorError('Invalid format sort string "%s" given by extractor' % item
)
5301 field
= match
.group('field')
5304 if self
._get
_field
_setting
(field
, 'type') == 'alias':
5305 alias
, field
= field
, self
._get
_field
_setting
(field
, 'field')
5306 if self
._get
_field
_setting
(alias
, 'deprecated'):
5307 self
.ydl
.deprecated_feature(f
'Format sorting alias {alias} is deprecated and may '
5308 f
'be removed in a future version. Please use {field} instead')
5309 reverse
= match
.group('reverse') is not None
5310 closest
= match
.group('separator') == '~'
5311 limit_text
= match
.group('limit')
5313 has_limit
= limit_text
is not None
5314 has_multiple_fields
= self
._get
_field
_setting
(field
, 'type') == 'combined'
5315 has_multiple_limits
= has_limit
and has_multiple_fields
and not self
._get
_field
_setting
(field
, 'same_limit')
5317 fields
= self
._get
_field
_setting
(field
, 'field') if has_multiple_fields
else (field
,)
5318 limits
= limit_text
.split(':') if has_multiple_limits
else (limit_text
,) if has_limit
else tuple()
5319 limit_count
= len(limits
)
5320 for (i
, f
) in enumerate(fields
):
5321 add_item(f
, reverse
, closest
,
5322 limits
[i
] if i
< limit_count
5323 else limits
[0] if has_limit
and not has_multiple_limits
5326 def print_verbose_info(self
, write_debug
):
5328 write_debug('Sort order given by user: %s' % ', '.join(self
._sort
_user
))
5329 if self
._sort
_extractor
:
5330 write_debug('Sort order given by extractor: %s' % ', '.join(self
._sort
_extractor
))
5331 write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
5332 '+' if self
._get
_field
_setting
(field
, 'reverse') else '', field
,
5333 '%s%s(%s)' % ('~' if self
._get
_field
_setting
(field
, 'closest') else ':',
5334 self
._get
_field
_setting
(field
, 'limit_text'),
5335 self
._get
_field
_setting
(field
, 'limit'))
5336 if self
._get
_field
_setting
(field
, 'limit_text') is not None else '')
5337 for field
in self
._order
if self
._get
_field
_setting
(field
, 'visible')]))
5339 def _calculate_field_preference_from_value(self
, format
, field
, type, value
):
5340 reverse
= self
._get
_field
_setting
(field
, 'reverse')
5341 closest
= self
._get
_field
_setting
(field
, 'closest')
5342 limit
= self
._get
_field
_setting
(field
, 'limit')
5344 if type == 'extractor':
5345 maximum
= self
._get
_field
_setting
(field
, 'max')
5346 if value
is None or (maximum
is not None and value
>= maximum
):
5348 elif type == 'boolean':
5349 in_list
= self
._get
_field
_setting
(field
, 'in_list')
5350 not_in_list
= self
._get
_field
_setting
(field
, 'not_in_list')
5351 value
= 0 if ((in_list
is None or value
in in_list
) and (not_in_list
is None or value
not in not_in_list
)) else -1
5352 elif type == 'ordered':
5353 value
= self
._resolve
_field
_value
(field
, value
, True)
5355 # try to convert to number
5356 val_num
= float_or_none(value
, default
=self
._get
_field
_setting
(field
, 'default'))
5357 is_num
= self
._get
_field
_setting
(field
, 'convert') != 'string' and val_num
is not None
5361 return ((-10, 0) if value
is None
5362 else (1, value
, 0) if not is_num
# if a field has mixed strings and numbers, strings are sorted higher
5363 else (0, -abs(value
- limit
), value
- limit
if reverse
else limit
- value
) if closest
5364 else (0, value
, 0) if not reverse
and (limit
is None or value
<= limit
)
5365 else (0, -value
, 0) if limit
is None or (reverse
and value
== limit
) or value
> limit
5366 else (-1, value
, 0))
5368 def _calculate_field_preference(self
, format
, field
):
5369 type = self
._get
_field
_setting
(field
, 'type') # extractor, boolean, ordered, field, multiple
5370 get_value
= lambda f
: format
.get(self
._get
_field
_setting
(f
, 'field'))
5371 if type == 'multiple':
5372 type = 'field' # Only 'field' is allowed in multiple for now
5373 actual_fields
= self
._get
_field
_setting
(field
, 'field')
5375 value
= self
._get
_field
_setting
(field
, 'function')(get_value(f
) for f
in actual_fields
)
5377 value
= get_value(field
)
5378 return self
._calculate
_field
_preference
_from
_value
(format
, field
, type, value
)
5380 def calculate_preference(self
, format
):
5381 # Determine missing protocol
5382 if not format
.get('protocol'):
5383 format
['protocol'] = determine_protocol(format
)
5385 # Determine missing ext
5386 if not format
.get('ext') and 'url' in format
:
5387 format
['ext'] = determine_ext(format
['url'])
5388 if format
.get('vcodec') == 'none':
5389 format
['audio_ext'] = format
['ext'] if format
.get('acodec') != 'none' else 'none'
5390 format
['video_ext'] = 'none'
5392 format
['video_ext'] = format
['ext']
5393 format
['audio_ext'] = 'none'
5394 # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
5395 # format['preference'] = -1000
5397 if format
.get('preference') is None and format
.get('ext') == 'flv' and re
.match('[hx]265|he?vc?', format
.get('vcodec') or ''):
5398 # HEVC-over-FLV is out-of-spec by FLV's original spec
5399 # ref. https://trac.ffmpeg.org/ticket/6389
5400 # ref. https://github.com/yt-dlp/yt-dlp/pull/5821
5401 format
['preference'] = -100
5403 # Determine missing bitrates
5404 if format
.get('vcodec') == 'none':
5406 if format
.get('acodec') == 'none':
5408 if not format
.get('vbr') and format
.get('vcodec') != 'none':
5409 format
['vbr'] = try_call(lambda: format
['tbr'] - format
['abr']) or None
5410 if not format
.get('abr') and format
.get('acodec') != 'none':
5411 format
['abr'] = try_call(lambda: format
['tbr'] - format
['vbr']) or None
5412 if not format
.get('tbr'):
5413 format
['tbr'] = try_call(lambda: format
['vbr'] + format
['abr']) or None
5415 return tuple(self
._calculate
_field
_preference
(format
, field
) for field
in self
._order
)
5420 def __init__(self
, ydl
=None):
5423 def debug(self
, message
):
5425 self
._ydl
.write_debug(message
)
5427 def info(self
, message
):
5429 self
._ydl
.to_screen(message
)
5431 def warning(self
, message
, *, once
=False):
5433 self
._ydl
.report_warning(message
, once
)
5435 def error(self
, message
, *, is_error
=True):
5437 self
._ydl
.report_error(message
, is_error
=is_error
)
5439 def stdout(self
, message
):
5441 self
._ydl
.to_stdout(message
)
5443 def stderr(self
, message
):
5445 self
._ydl
.to_stderr(message
)