47 import xml
.etree
.ElementTree
50 from . import traversal
52 from ..compat
import functools
# isort: split
53 from ..compat
import (
54 compat_etree_fromstring
,
56 compat_HTMLParseError
,
60 from ..dependencies
import brotli
, certifi
, websockets
, xattr
61 from ..socks
import ProxyType
, sockssocket
63 __name__
= __name__
.rsplit('.', 1)[0] # Pretend to be the parent module
65 # This is not clearly defined otherwise
66 compiled_regex_type
= type(re
.compile(''))
69 def random_user_agent():
70 _USER_AGENT_TPL
= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
111 return _USER_AGENT_TPL
% random
.choice(_CHROME_VERSIONS
)
114 SUPPORTED_ENCODINGS
= [
118 SUPPORTED_ENCODINGS
.append('br')
121 'User-Agent': random_user_agent(),
122 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
123 'Accept-Language': 'en-us,en;q=0.5',
124 'Sec-Fetch-Mode': 'navigate',
129 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
133 NO_DEFAULT
= object()
134 IDENTITY
= lambda x
: x
136 ENGLISH_MONTH_NAMES
= [
137 'January', 'February', 'March', 'April', 'May', 'June',
138 'July', 'August', 'September', 'October', 'November', 'December']
141 'en': ENGLISH_MONTH_NAMES
,
143 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
144 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
145 # these follow the genitive grammatical case (dopełniacz)
146 # some websites might be using nominative, which will require another month list
147 # https://en.wikibooks.org/wiki/Polish/Noun_cases
148 'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
149 'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
152 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
154 'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
155 'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
156 'EST': -5, 'EDT': -4, # Eastern
157 'CST': -6, 'CDT': -5, # Central
158 'MST': -7, 'MDT': -6, # Mountain
159 'PST': -8, 'PDT': -7 # Pacific
162 # needed for sanitizing filenames in restricted mode
163 ACCENT_CHARS
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
164 itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
165 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
195 '%Y-%m-%d %H:%M:%S.%f',
196 '%Y-%m-%d %H:%M:%S:%f',
199 '%Y-%m-%dT%H:%M:%SZ',
200 '%Y-%m-%dT%H:%M:%S.%fZ',
201 '%Y-%m-%dT%H:%M:%S.%f0Z',
203 '%Y-%m-%dT%H:%M:%S.%f',
206 '%b %d %Y at %H:%M:%S',
208 '%B %d %Y at %H:%M:%S',
212 DATE_FORMATS_DAY_FIRST
= list(DATE_FORMATS
)
213 DATE_FORMATS_DAY_FIRST
.extend([
223 DATE_FORMATS_MONTH_FIRST
= list(DATE_FORMATS
)
224 DATE_FORMATS_MONTH_FIRST
.extend([
232 PACKED_CODES_RE
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
233 JSON_LD_RE
= r
'(?is)<script[^>]+type=(["\']?
)application
/ld\
+json\
1[^
>]*>\s
*(?P
<json_ld
>{.+?}|\
[.+?\
])\s
*</script
>'
235 NUMBER_RE = r'\d
+(?
:\
.\d
+)?
'
239 def preferredencoding():
240 """Get preferred encoding.
242 Returns the best encoding scheme for the system, based on
243 locale.getpreferredencoding() and some further tweaks.
246 pref = locale.getpreferredencoding()
254 def write_json_file(obj, fn):
255 """ Encode obj as JSON and write it to fn, atomically if possible """
257 tf = tempfile.NamedTemporaryFile(
258 prefix=f'{os.path.basename(fn)}
.', dir=os.path.dirname(fn),
259 suffix='.tmp
', delete=False, mode='w
', encoding='utf
-8')
263 json.dump(obj, tf, ensure_ascii=False)
264 if sys.platform == 'win32
':
265 # Need to remove existing file on Windows, else os.rename raises
266 # WindowsError or FileExistsError.
267 with contextlib.suppress(OSError):
269 with contextlib.suppress(OSError):
272 os.chmod(tf.name, 0o666 & ~mask)
273 os.rename(tf.name, fn)
275 with contextlib.suppress(OSError):
280 def find_xpath_attr(node, xpath, key, val=None):
281 """ Find the xpath xpath[@key=val] """
282 assert re.match(r'^
[a
-zA
-Z_
-]+$
', key)
283 expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}
']")
284 return node.find(expr)
286 # On python2.6 the xml.etree.ElementTree.Element methods don't support
287 # the namespace parameter
290 def xpath_with_ns(path
, ns_map
):
291 components
= [c
.split(':') for c
in path
.split('/')]
295 replaced
.append(c
[0])
298 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
299 return '/'.join(replaced
)
302 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
303 def _find_xpath(xpath
):
304 return node
.find(xpath
)
306 if isinstance(xpath
, str):
307 n
= _find_xpath(xpath
)
315 if default
is not NO_DEFAULT
:
318 name
= xpath
if name
is None else name
319 raise ExtractorError('Could not find XML element %s' % name
)
325 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
326 n
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
)
327 if n
is None or n
== default
:
330 if default
is not NO_DEFAULT
:
333 name
= xpath
if name
is None else name
334 raise ExtractorError('Could not find XML element\'s text %s' % name
)
340 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
):
341 n
= find_xpath_attr(node
, xpath
, key
)
343 if default
is not NO_DEFAULT
:
346 name
= f
'{xpath}[@{key}]' if name
is None else name
347 raise ExtractorError('Could not find XML attribute %s' % name
)
353 def get_element_by_id(id, html
, **kwargs
):
354 """Return the content of the tag with the specified ID in the passed HTML document"""
355 return get_element_by_attribute('id', id, html
, **kwargs
)
358 def get_element_html_by_id(id, html
, **kwargs
):
359 """Return the html of the tag with the specified ID in the passed HTML document"""
360 return get_element_html_by_attribute('id', id, html
, **kwargs
)
363 def get_element_by_class(class_name
, html
):
364 """Return the content of the first tag with the specified class in the passed HTML document"""
365 retval
= get_elements_by_class(class_name
, html
)
366 return retval
[0] if retval
else None
369 def get_element_html_by_class(class_name
, html
):
370 """Return the html of the first tag with the specified class in the passed HTML document"""
371 retval
= get_elements_html_by_class(class_name
, html
)
372 return retval
[0] if retval
else None
375 def get_element_by_attribute(attribute
, value
, html
, **kwargs
):
376 retval
= get_elements_by_attribute(attribute
, value
, html
, **kwargs
)
377 return retval
[0] if retval
else None
380 def get_element_html_by_attribute(attribute
, value
, html
, **kargs
):
381 retval
= get_elements_html_by_attribute(attribute
, value
, html
, **kargs
)
382 return retval
[0] if retval
else None
385 def get_elements_by_class(class_name
, html
, **kargs
):
386 """Return the content of all tags with the specified class in the passed HTML document as a list"""
387 return get_elements_by_attribute(
388 'class', r
'[^\'"]*(?<=[\'"\s
])%s(?
=[\'"\s])[^\'"]*' % re.escape(class_name),
389 html, escape_value=False)
392 def get_elements_html_by_class(class_name, html):
393 """Return the html of all tags with the specified class in the passed HTML document as a list"""
394 return get_elements_html_by_attribute(
395 'class', r'[^
\'"]*(?<=[\'"\s
])%s(?
=[\'"\s])[^\'"]*' % re.escape(class_name),
396 html, escape_value=False)
399 def get_elements_by_attribute(*args, **kwargs):
400 """Return the content of the tag with the specified attribute in the passed HTML document"""
401 return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
404 def get_elements_html_by_attribute(*args, **kwargs):
405 """Return the html of the tag with the specified attribute in the passed HTML document"""
406 return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
409 def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w
:.-]+', escape_value=True):
411 Return the text (content) and the html (whole) of the tag with the specified
412 attribute in the passed HTML document
417 quote = '' if re.match(r'''[\s"'`
=<>]''', value) else '?'
419 value = re.escape(value) if escape_value else value
421 partial_element_re = rf'''(?x
)
423 (?
:\
s(?
:[^
>"']|"[^
"]*"|
'[^']*')*)?
424 \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
427 for m in re.finditer(partial_element_re, html):
428 content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
431 unescapeHTML(re.sub(r'^(?P<q>["\'])(?P
<content
>.*)(?P
=q
)$
', r'\g
<content
>', content, flags=re.DOTALL)),
436 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
438 HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
439 closing tag for the first opening tag it has encountered, and can be used
443 class HTMLBreakOnClosingTagException(Exception):
447 self.tagstack = collections.deque()
448 html.parser.HTMLParser.__init__(self)
453 def __exit__(self, *_):
457 # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
458 # so data remains buffered; we no longer have any interest in it, thus
459 # override this method to discard it
462 def handle_starttag(self, tag, _):
463 self.tagstack.append(tag)
465 def handle_endtag(self, tag):
466 if not self.tagstack:
467 raise compat_HTMLParseError('no tags
in the stack
')
469 inner_tag = self.tagstack.pop()
473 raise compat_HTMLParseError(f'matching opening tag
for closing {tag} tag
not found
')
474 if not self.tagstack:
475 raise self.HTMLBreakOnClosingTagException()
478 # XXX: This should be far less strict
479 def get_element_text_and_html_by_tag(tag, html):
481 For the first element with the specified tag in the passed HTML document
482 return its' content (text
) and the whole
element (html
)
484 def find_or_raise(haystack, needle, exc):
486 return haystack.index(needle)
489 closing_tag = f'</{tag}>'
490 whole_start = find_or_raise(
491 html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
492 content_start = find_or_raise(
493 html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
494 content_start += whole_start + 1
495 with HTMLBreakOnClosingTagParser() as parser:
496 parser.feed(html[whole_start:content_start])
497 if not parser.tagstack or parser.tagstack[0] != tag:
498 raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
499 offset = content_start
500 while offset < len(html):
501 next_closing_tag_start = find_or_raise(
502 html[offset:], closing_tag,
503 compat_HTMLParseError(f'closing {tag} tag not found'))
504 next_closing_tag_end = next_closing_tag_start + len(closing_tag)
506 parser.feed(html[offset:offset + next_closing_tag_end])
507 offset += next_closing_tag_end
508 except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
509 return html[content_start:offset + next_closing_tag_start], \
510 html[whole_start:offset + next_closing_tag_end]
511 raise compat_HTMLParseError('unexpected end of html')
514 class HTMLAttributeParser(html.parser.HTMLParser):
515 """Trivial HTML parser to gather the attributes
for a single element
"""
519 html.parser.HTMLParser.__init__(self)
521 def handle_starttag(self, tag, attrs):
522 self.attrs = dict(attrs)
523 raise compat_HTMLParseError('done')
526 class HTMLListAttrsParser(html.parser.HTMLParser):
527 """HTML parser to gather the attributes
for the elements of a
list"""
530 html.parser.HTMLParser.__init__(self)
534 def handle_starttag(self, tag, attrs):
535 if tag == 'li' and self._level == 0:
536 self.items.append(dict(attrs))
539 def handle_endtag(self, tag):
543 def extract_attributes(html_element):
544 """Given a string
for an HTML element such
as
546 a
="foo" B
="bar" c
="&98;az" d
=boz
547 empty
= noval entity
="&"
550 Decode
and return a dictionary of attributes
.
552 'a': 'foo', 'b': 'bar', c
: 'baz', d
: 'boz',
553 'empty': '', 'noval': None, 'entity': '&',
554 'sq': '"', 'dq': '\''
557 parser = HTMLAttributeParser()
558 with contextlib.suppress(compat_HTMLParseError):
559 parser.feed(html_element)
564 def parse_list(webpage):
565 """Given a string
for an series of HTML
<li
> elements
,
566 return a dictionary of their attributes
"""
567 parser = HTMLListAttrsParser()
573 def clean_html(html):
574 """Clean an HTML snippet into a readable string
"""
576 if html is None: # Convenience for sanitizing descriptions etc.
579 html = re.sub(r'\s+', ' ', html)
580 html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
581 html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
583 html = re.sub('<.*?>', '', html)
584 # Replace html entities
585 html = unescapeHTML(html)
589 class LenientJSONDecoder(json.JSONDecoder):
591 def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs):
592 self.transform_source, self.ignore_extra = transform_source, ignore_extra
593 self._close_attempts = 2 * close_objects
594 super().__init__(*args, **kwargs)
597 def _close_object(err):
598 doc = err.doc[:err.pos]
599 # We need to add comma first to get the correct error message
600 if err.msg.startswith('Expecting \',\''):
602 elif not doc.endswith(','):
605 if err.msg.startswith('Expecting property name'):
606 return doc[:-1] + '}'
607 elif err.msg.startswith('Expecting value'):
608 return doc[:-1] + ']'
611 if self.transform_source:
612 s = self.transform_source(s)
613 for attempt in range(self._close_attempts + 1):
615 if self.ignore_extra:
616 return self.raw_decode(s.lstrip())[0]
617 return super().decode(s)
618 except json.JSONDecodeError as e:
621 elif attempt < self._close_attempts:
622 s = self._close_object(e)
625 raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
626 assert False, 'Too many attempts to decode JSON'
629 def sanitize_open(filename, open_mode):
630 """Try to
open the given filename
, and slightly tweak it
if this fails
.
632 Attempts to
open the given filename
. If this fails
, it tries to change
633 the filename slightly
, step by step
, until it
's either able to open it
634 or it fails and raises a final exception, like the standard open()
637 It returns the tuple (stream, definitive_file_name).
640 if sys.platform == 'win32
':
643 # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
644 with contextlib.suppress(io.UnsupportedOperation):
645 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
646 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
648 for attempt in range(2):
651 if sys.platform == 'win32
':
652 # FIXME: An exclusive lock also locks the file from being read.
653 # Since windows locks are mandatory, don't lock the
file on
windows (for now
).
654 # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
655 raise LockingUnsupportedError()
656 stream
= locked_file(filename
, open_mode
, block
=False).__enter
__()
658 stream
= open(filename
, open_mode
)
659 return stream
, filename
660 except OSError as err
:
661 if attempt
or err
.errno
in (errno
.EACCES
,):
663 old_filename
, filename
= filename
, sanitize_path(filename
)
664 if old_filename
== filename
:
668 def timeconvert(timestr
):
669 """Convert RFC 2822 defined time string into system timestamp"""
671 timetuple
= email
.utils
.parsedate_tz(timestr
)
672 if timetuple
is not None:
673 timestamp
= email
.utils
.mktime_tz(timetuple
)
677 def sanitize_filename(s
, restricted
=False, is_id
=NO_DEFAULT
):
678 """Sanitizes a string so it could be used as part of a filename.
679 @param restricted Use a stricter subset of allowed characters
680 @param is_id Whether this is an ID that should be kept unchanged if possible.
681 If unset, yt-dlp's new sanitization rules are in effect
686 def replace_insane(char
):
687 if restricted
and char
in ACCENT_CHARS
:
688 return ACCENT_CHARS
[char
]
689 elif not restricted
and char
== '\n':
691 elif is_id
is NO_DEFAULT
and not restricted
and char
in '"*:<>?|/\\':
692 # Replace with their full-width unicode counterparts
693 return {'/': '\u29F8', '\\': '\u29f9'}
.get(char
, chr(ord(char
) + 0xfee0))
694 elif char
== '?' or ord(char
) < 32 or ord(char
) == 127:
697 return '' if restricted
else '\''
699 return '\0_\0-' if restricted
else '\0 \0-'
700 elif char
in '\\/|*<>':
702 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace() or ord(char
) > 127):
706 # Replace look-alike Unicode glyphs
707 if restricted
and (is_id
is NO_DEFAULT
or not is_id
):
708 s
= unicodedata
.normalize('NFKC', s
)
709 s
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) # Handle timestamps
710 result
= ''.join(map(replace_insane
, s
))
711 if is_id
is NO_DEFAULT
:
712 result
= re
.sub(r
'(\0.)(?:(?=\1)..)+', r
'\1', result
) # Remove repeated substitute chars
713 STRIP_RE
= r
'(?:\0.|[ _-])*'
714 result
= re
.sub(f
'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result
) # Remove substitute chars from start/end
715 result
= result
.replace('\0', '') or '_'
718 while '__' in result
:
719 result
= result
.replace('__', '_')
720 result
= result
.strip('_')
721 # Common case of "Foreign band name - English song title"
722 if restricted
and result
.startswith('-_'):
724 if result
.startswith('-'):
725 result
= '_' + result
[len('-'):]
726 result
= result
.lstrip('.')
732 def sanitize_path(s
, force
=False):
733 """Sanitizes and normalizes path on Windows"""
734 if sys
.platform
== 'win32':
736 drive_or_unc
, _
= os
.path
.splitdrive(s
)
742 norm_path
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
)
746 path_part
if path_part
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
)
747 for path_part
in norm_path
]
749 sanitized_path
.insert(0, drive_or_unc
+ os
.path
.sep
)
750 elif force
and s
and s
[0] == os
.path
.sep
:
751 sanitized_path
.insert(0, os
.path
.sep
)
752 return os
.path
.join(*sanitized_path
)
755 def sanitize_url(url
, *, scheme
='http'):
756 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
757 # the number of unwanted failures due to missing protocol
760 elif url
.startswith('//'):
761 return f
'{scheme}:{url}'
762 # Fix some common typos seen so far
764 # https://github.com/ytdl-org/youtube-dl/issues/15649
765 (r
'^httpss://', r
'https://'),
766 # https://bx1.be/lives/direct-tv/
767 (r
'^rmtp([es]?)://', r
'rtmp\1://'),
769 for mistake
, fixup
in COMMON_TYPOS
:
770 if re
.match(mistake
, url
):
771 return re
.sub(mistake
, fixup
, url
)
775 def extract_basic_auth(url
):
776 parts
= urllib
.parse
.urlsplit(url
)
777 if parts
.username
is None:
779 url
= urllib
.parse
.urlunsplit(parts
._replace
(netloc
=(
780 parts
.hostname
if parts
.port
is None
781 else '%s:%d' % (parts
.hostname
, parts
.port
))))
782 auth_payload
= base64
.b64encode(
783 ('%s:%s' % (parts
.username
, parts
.password
or '')).encode())
784 return url
, f
'Basic {auth_payload.decode()}'
787 def sanitized_Request(url
, *args
, **kwargs
):
788 url
, auth_header
= extract_basic_auth(escape_url(sanitize_url(url
)))
789 if auth_header
is not None:
790 headers
= args
[1] if len(args
) >= 2 else kwargs
.setdefault('headers', {})
791 headers
['Authorization'] = auth_header
792 return urllib
.request
.Request(url
, *args
, **kwargs
)
796 """Expand shell variables and ~"""
797 return os
.path
.expandvars(compat_expanduser(s
))
800 def orderedSet(iterable
, *, lazy
=False):
801 """Remove all duplicates from the input iterable"""
803 seen
= [] # Do not use set since the items can be unhashable
809 return _iter() if lazy
else list(_iter())
812 def _htmlentity_transform(entity_with_semicolon
):
813 """Transforms an HTML entity to a character."""
814 entity
= entity_with_semicolon
[:-1]
816 # Known non-numeric HTML entity
817 if entity
in html
.entities
.name2codepoint
:
818 return chr(html
.entities
.name2codepoint
[entity
])
820 # TODO: HTML5 allows entities without a semicolon.
821 # E.g. 'Éric' should be decoded as 'Éric'.
822 if entity_with_semicolon
in html
.entities
.html5
:
823 return html
.entities
.html5
[entity_with_semicolon
]
825 mobj
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
)
827 numstr
= mobj
.group(1)
828 if numstr
.startswith('x'):
830 numstr
= '0%s' % numstr
833 # See https://github.com/ytdl-org/youtube-dl/issues/7518
834 with contextlib
.suppress(ValueError):
835 return chr(int(numstr
, base
))
837 # Unknown entity in name, return its literal representation
838 return '&%s;' % entity
844 assert isinstance(s
, str)
847 r
'&([^&;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
)
850 def escapeHTML(text
):
853 .replace('&', '&')
854 .replace('<', '<')
855 .replace('>', '>')
856 .replace('"', '"')
857 .replace("'", ''')
861 def process_communicate_or_kill(p
, *args
, **kwargs
):
862 deprecation_warning(f
'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
863 f
'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
864 return Popen
.communicate_or_kill(p
, *args
, **kwargs
)
867 class Popen(subprocess
.Popen
):
868 if sys
.platform
== 'win32':
869 _startupinfo
= subprocess
.STARTUPINFO()
870 _startupinfo
.dwFlags |
= subprocess
.STARTF_USESHOWWINDOW
875 def _fix_pyinstaller_ld_path(env
):
876 """Restore LD_LIBRARY_PATH when using PyInstaller
877 Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
878 https://github.com/yt-dlp/yt-dlp/issues/4573
880 if not hasattr(sys
, '_MEIPASS'):
884 orig
= env
.get(f
'{key}_ORIG')
890 _fix('LD_LIBRARY_PATH') # Linux
891 _fix('DYLD_LIBRARY_PATH') # macOS
893 def __init__(self
, *args
, env
=None, text
=False, **kwargs
):
895 env
= os
.environ
.copy()
896 self
._fix
_pyinstaller
_ld
_path
(env
)
898 self
.__text
_mode
= kwargs
.get('encoding') or kwargs
.get('errors') or text
or kwargs
.get('universal_newlines')
900 kwargs
['universal_newlines'] = True # For 3.6 compatibility
901 kwargs
.setdefault('encoding', 'utf-8')
902 kwargs
.setdefault('errors', 'replace')
903 super().__init
__(*args
, env
=env
, **kwargs
, startupinfo
=self
._startupinfo
)
905 def communicate_or_kill(self
, *args
, **kwargs
):
907 return self
.communicate(*args
, **kwargs
)
908 except BaseException
: # Including KeyboardInterrupt
909 self
.kill(timeout
=None)
912 def kill(self
, *, timeout
=0):
915 self
.wait(timeout
=timeout
)
918 def run(cls
, *args
, timeout
=None, **kwargs
):
919 with cls(*args
, **kwargs
) as proc
:
920 default
= '' if proc
.__text
_mode
else b
''
921 stdout
, stderr
= proc
.communicate_or_kill(timeout
=timeout
)
922 return stdout
or default
, stderr
or default
, proc
.returncode
925 def encodeArgument(s
):
926 # Legacy code that uses byte strings
927 # Uncomment the following line after fixing all post processors
928 # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
929 return s
if isinstance(s
, str) else s
.decode('ascii')
932 _timetuple
= collections
.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
935 def timetuple_from_msec(msec
):
936 secs
, msec
= divmod(msec
, 1000)
937 mins
, secs
= divmod(secs
, 60)
938 hrs
, mins
= divmod(mins
, 60)
939 return _timetuple(hrs
, mins
, secs
, msec
)
942 def formatSeconds(secs
, delim
=':', msec
=False):
943 time
= timetuple_from_msec(secs
* 1000)
945 ret
= '%d%s%02d%s%02d' % (time
.hours
, delim
, time
.minutes
, delim
, time
.seconds
)
947 ret
= '%d%s%02d' % (time
.minutes
, delim
, time
.seconds
)
949 ret
= '%d' % time
.seconds
950 return '%s.%03d' % (ret
, time
.milliseconds
) if msec
else ret
953 def _ssl_load_windows_store_certs(ssl_context
, storename
):
954 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
956 certs
= [cert
for cert
, encoding
, trust
in ssl
.enum_certificates(storename
)
957 if encoding
== 'x509_asn' and (
958 trust
is True or ssl
.Purpose
.SERVER_AUTH
.oid
in trust
)]
959 except PermissionError
:
962 with contextlib
.suppress(ssl
.SSLError
):
963 ssl_context
.load_verify_locations(cadata
=cert
)
966 def make_HTTPS_handler(params
, **kwargs
):
967 opts_check_certificate
= not params
.get('nocheckcertificate')
968 context
= ssl
.SSLContext(ssl
.PROTOCOL_TLS_CLIENT
)
969 context
.check_hostname
= opts_check_certificate
970 if params
.get('legacyserverconnect'):
971 context
.options |
= 4 # SSL_OP_LEGACY_SERVER_CONNECT
972 # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
973 context
.set_ciphers('DEFAULT')
975 sys
.version_info
< (3, 10)
976 and ssl
.OPENSSL_VERSION_INFO
>= (1, 1, 1)
977 and not ssl
.OPENSSL_VERSION
.startswith('LibreSSL')
979 # Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1].
980 # This is to ensure consistent behavior across Python versions, and help avoid fingerprinting
981 # in some situations [2][3].
982 # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely
983 # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe.
984 # LibreSSL is excluded until further investigation due to cipher support issues [5][6].
985 # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536
986 # 2. https://github.com/yt-dlp/yt-dlp/issues/4627
987 # 3. https://github.com/yt-dlp/yt-dlp/pull/5294
988 # 4. https://peps.python.org/pep-0644/
989 # 5. https://peps.python.org/pep-0644/#libressl-support
990 # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368
991 context
.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM')
992 context
.minimum_version
= ssl
.TLSVersion
.TLSv1_2
994 context
.verify_mode
= ssl
.CERT_REQUIRED
if opts_check_certificate
else ssl
.CERT_NONE
995 if opts_check_certificate
:
996 if certifi
and 'no-certifi' not in params
.get('compat_opts', []):
997 context
.load_verify_locations(cafile
=certifi
.where())
1000 context
.load_default_certs()
1001 # Work around the issue in load_default_certs when there are bad certificates. See:
1002 # https://github.com/yt-dlp/yt-dlp/issues/1060,
1003 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
1004 except ssl
.SSLError
:
1005 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
1006 if sys
.platform
== 'win32' and hasattr(ssl
, 'enum_certificates'):
1007 for storename
in ('CA', 'ROOT'):
1008 _ssl_load_windows_store_certs(context
, storename
)
1009 context
.set_default_verify_paths()
1011 client_certfile
= params
.get('client_certificate')
1014 context
.load_cert_chain(
1015 client_certfile
, keyfile
=params
.get('client_certificate_key'),
1016 password
=params
.get('client_certificate_password'))
1017 except ssl
.SSLError
:
1018 raise YoutubeDLError('Unable to load client certificate')
1020 # Some servers may reject requests if ALPN extension is not sent. See:
1021 # https://github.com/python/cpython/issues/85140
1022 # https://github.com/yt-dlp/yt-dlp/issues/3878
1023 with contextlib
.suppress(NotImplementedError):
1024 context
.set_alpn_protocols(['http/1.1'])
1026 return YoutubeDLHTTPSHandler(params
, context
=context
, **kwargs
)
1029 def bug_reports_message(before
=';'):
1030 from ..update
import REPOSITORY
1032 msg
= (f
'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
1033 'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
1035 before
= before
.rstrip()
1036 if not before
or before
.endswith(('.', '!', '?')):
1037 msg
= msg
[0].title() + msg
[1:]
1039 return (before
+ ' ' if before
else '') + msg
1042 class YoutubeDLError(Exception):
1043 """Base exception for YoutubeDL errors."""
1046 def __init__(self
, msg
=None):
1049 elif self
.msg
is None:
1050 self
.msg
= type(self
).__name
__
1051 super().__init
__(self
.msg
)
1054 network_exceptions
= [urllib
.error
.URLError
, http
.client
.HTTPException
, socket
.error
]
1055 if hasattr(ssl
, 'CertificateError'):
1056 network_exceptions
.append(ssl
.CertificateError
)
1057 network_exceptions
= tuple(network_exceptions
)
1060 class ExtractorError(YoutubeDLError
):
1061 """Error during info extraction."""
1063 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None, ie
=None):
1064 """ tb, if given, is the original traceback (so that it can be printed out).
1065 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1067 if sys
.exc_info()[0] in network_exceptions
:
1070 self
.orig_msg
= str(msg
)
1072 self
.expected
= expected
1074 self
.video_id
= video_id
1076 self
.exc_info
= sys
.exc_info() # preserve original exception
1077 if isinstance(self
.exc_info
[1], ExtractorError
):
1078 self
.exc_info
= self
.exc_info
[1].exc_info
1079 super().__init
__(self
.__msg
)
1084 format_field(self
.ie
, None, '[%s] '),
1085 format_field(self
.video_id
, None, '%s: '),
1087 format_field(self
.cause
, None, ' (caused by %r)'),
1088 '' if self
.expected
else bug_reports_message()))
1090 def format_traceback(self
):
1091 return join_nonempty(
1092 self
.traceback
and ''.join(traceback
.format_tb(self
.traceback
)),
1093 self
.cause
and ''.join(traceback
.format_exception(None, self
.cause
, self
.cause
.__traceback
__)[1:]),
1096 def __setattr__(self
, name
, value
):
1097 super().__setattr
__(name
, value
)
1098 if getattr(self
, 'msg', None) and name
not in ('msg', 'args'):
1099 self
.msg
= self
.__msg
or type(self
).__name
__
1100 self
.args
= (self
.msg
, ) # Cannot be property
1103 class UnsupportedError(ExtractorError
):
1104 def __init__(self
, url
):
1106 'Unsupported URL: %s' % url
, expected
=True)
1110 class RegexNotFoundError(ExtractorError
):
1111 """Error when a regex didn't match"""
1115 class GeoRestrictedError(ExtractorError
):
1116 """Geographic restriction Error exception.
1118 This exception may be thrown when a video is not available from your
1119 geographic location due to geographic restrictions imposed by a website.
1122 def __init__(self
, msg
, countries
=None, **kwargs
):
1123 kwargs
['expected'] = True
1124 super().__init
__(msg
, **kwargs
)
1125 self
.countries
= countries
1128 class UserNotLive(ExtractorError
):
1129 """Error when a channel/user is not live"""
1131 def __init__(self
, msg
=None, **kwargs
):
1132 kwargs
['expected'] = True
1133 super().__init
__(msg
or 'The channel is not currently live', **kwargs
)
1136 class DownloadError(YoutubeDLError
):
1137 """Download Error exception.
1139 This exception may be thrown by FileDownloader objects if they are not
1140 configured to continue on errors. They will contain the appropriate
1144 def __init__(self
, msg
, exc_info
=None):
1145 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1146 super().__init
__(msg
)
1147 self
.exc_info
= exc_info
1150 class EntryNotInPlaylist(YoutubeDLError
):
1151 """Entry not in playlist exception.
1153 This exception will be thrown by YoutubeDL when a requested entry
1154 is not found in the playlist info_dict
1156 msg
= 'Entry not found in info'
1159 class SameFileError(YoutubeDLError
):
1160 """Same File exception.
1162 This exception will be thrown by FileDownloader objects if they detect
1163 multiple files would have to be downloaded to the same file on disk.
1165 msg
= 'Fixed output name but more than one file to download'
1167 def __init__(self
, filename
=None):
1168 if filename
is not None:
1169 self
.msg
+= f
': {filename}'
1170 super().__init
__(self
.msg
)
1173 class PostProcessingError(YoutubeDLError
):
1174 """Post Processing exception.
1176 This exception may be raised by PostProcessor's .run() method to
1177 indicate an error in the postprocessing task.
1181 class DownloadCancelled(YoutubeDLError
):
1182 """ Exception raised when the download queue should be interrupted """
1183 msg
= 'The download was cancelled'
1186 class ExistingVideoReached(DownloadCancelled
):
1187 """ --break-on-existing triggered """
1188 msg
= 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1191 class RejectedVideoReached(DownloadCancelled
):
1192 """ --break-match-filter triggered """
1193 msg
= 'Encountered a video that did not match filter, stopping due to --break-match-filter'
1196 class MaxDownloadsReached(DownloadCancelled
):
1197 """ --max-downloads limit has been reached. """
1198 msg
= 'Maximum number of downloads reached, stopping due to --max-downloads'
1201 class ReExtractInfo(YoutubeDLError
):
1202 """ Video info needs to be re-extracted. """
1204 def __init__(self
, msg
, expected
=False):
1205 super().__init
__(msg
)
1206 self
.expected
= expected
1209 class ThrottledDownload(ReExtractInfo
):
1210 """ Download speed below --throttled-rate. """
1211 msg
= 'The download speed is below throttle limit'
1214 super().__init
__(self
.msg
, expected
=False)
1217 class UnavailableVideoError(YoutubeDLError
):
1218 """Unavailable Format exception.
1220 This exception will be thrown when a video is requested
1221 in a format that is not available for that video.
1223 msg
= 'Unable to download video'
1225 def __init__(self
, err
=None):
1227 self
.msg
+= f
': {err}'
1228 super().__init
__(self
.msg
)
1231 class ContentTooShortError(YoutubeDLError
):
1232 """Content Too Short exception.
1234 This exception may be raised by FileDownloader objects when a file they
1235 download is too small for what the server announced first, indicating
1236 the connection was probably interrupted.
1239 def __init__(self
, downloaded
, expected
):
1240 super().__init
__(f
'Downloaded {downloaded} bytes, expected {expected} bytes')
1242 self
.downloaded
= downloaded
1243 self
.expected
= expected
1246 class XAttrMetadataError(YoutubeDLError
):
1247 def __init__(self
, code
=None, msg
='Unknown error'):
1248 super().__init
__(msg
)
1252 # Parsing code and msg
1253 if (self
.code
in (errno
.ENOSPC
, errno
.EDQUOT
)
1254 or 'No space left' in self
.msg
or 'Disk quota exceeded' in self
.msg
):
1255 self
.reason
= 'NO_SPACE'
1256 elif self
.code
== errno
.E2BIG
or 'Argument list too long' in self
.msg
:
1257 self
.reason
= 'VALUE_TOO_LONG'
1259 self
.reason
= 'NOT_SUPPORTED'
1262 class XAttrUnavailableError(YoutubeDLError
):
1266 def _create_http_connection(ydl_handler
, http_class
, is_https
, *args
, **kwargs
):
1267 hc
= http_class(*args
, **kwargs
)
1268 source_address
= ydl_handler
._params
.get('source_address')
1270 if source_address
is not None:
1271 # This is to workaround _create_connection() from socket where it will try all
1272 # address data from getaddrinfo() including IPv6. This filters the result from
1273 # getaddrinfo() based on the source_address value.
1274 # This is based on the cpython socket.create_connection() function.
1275 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1276 def _create_connection(address
, timeout
=socket
._GLOBAL
_DEFAULT
_TIMEOUT
, source_address
=None):
1277 host
, port
= address
1279 addrs
= socket
.getaddrinfo(host
, port
, 0, socket
.SOCK_STREAM
)
1280 af
= socket
.AF_INET
if '.' in source_address
[0] else socket
.AF_INET6
1281 ip_addrs
= [addr
for addr
in addrs
if addr
[0] == af
]
1282 if addrs
and not ip_addrs
:
1283 ip_version
= 'v4' if af
== socket
.AF_INET
else 'v6'
1285 "No remote IP%s addresses available for connect, can't use '%s' as source address"
1286 % (ip_version
, source_address
[0]))
1287 for res
in ip_addrs
:
1288 af
, socktype
, proto
, canonname
, sa
= res
1291 sock
= socket
.socket(af
, socktype
, proto
)
1292 if timeout
is not socket
._GLOBAL
_DEFAULT
_TIMEOUT
:
1293 sock
.settimeout(timeout
)
1294 sock
.bind(source_address
)
1296 err
= None # Explicitly break reference cycle
1298 except OSError as _
:
1300 if sock
is not None:
1305 raise OSError('getaddrinfo returns an empty list')
1306 if hasattr(hc
, '_create_connection'):
1307 hc
._create
_connection
= _create_connection
1308 hc
.source_address
= (source_address
, 0)
1313 class YoutubeDLHandler(urllib
.request
.HTTPHandler
):
1314 """Handler for HTTP requests and responses.
1316 This class, when installed with an OpenerDirector, automatically adds
1317 the standard headers to every HTTP request and handles gzipped, deflated and
1318 brotli responses from web servers.
1320 Part of this code was copied from:
1322 http://techknack.net/python-urllib2-handlers/
1324 Andrew Rowls, the author of that code, agreed to release it to the
1328 def __init__(self
, params
, *args
, **kwargs
):
1329 urllib
.request
.HTTPHandler
.__init
__(self
, *args
, **kwargs
)
1330 self
._params
= params
1332 def http_open(self
, req
):
1333 conn_class
= http
.client
.HTTPConnection
1335 socks_proxy
= req
.headers
.get('Ytdl-socks-proxy')
1337 conn_class
= make_socks_conn_class(conn_class
, socks_proxy
)
1338 del req
.headers
['Ytdl-socks-proxy']
1340 return self
.do_open(functools
.partial(
1341 _create_http_connection
, self
, conn_class
, False),
1349 return zlib
.decompress(data
, -zlib
.MAX_WBITS
)
1351 return zlib
.decompress(data
)
1357 return brotli
.decompress(data
)
1359 def http_request(self
, req
):
1360 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1361 # always respected by websites, some tend to give out URLs with non percent-encoded
1362 # non-ASCII characters (see telemb.py, ard.py [#3412])
1363 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1364 # To work around aforementioned issue we will replace request's original URL with
1365 # percent-encoded one
1366 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1367 # the code of this workaround has been moved here from YoutubeDL.urlopen()
1368 url
= req
.get_full_url()
1369 url_escaped
= escape_url(url
)
1371 # Substitute URL if any change after escaping
1372 if url
!= url_escaped
:
1373 req
= update_Request(req
, url
=url_escaped
)
1375 for h
, v
in self
._params
.get('http_headers', std_headers
).items():
1376 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1377 # The dict keys are capitalized because of this bug by urllib
1378 if h
.capitalize() not in req
.headers
:
1379 req
.add_header(h
, v
)
1381 if 'Youtubedl-no-compression' in req
.headers
: # deprecated
1382 req
.headers
.pop('Youtubedl-no-compression', None)
1383 req
.add_header('Accept-encoding', 'identity')
1385 if 'Accept-encoding' not in req
.headers
:
1386 req
.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS
))
1388 return super().do_request_(req
)
1390 def http_response(self
, req
, resp
):
1393 if resp
.headers
.get('Content-encoding', '') == 'gzip':
1394 content
= resp
.read()
1395 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
), mode
='rb')
1397 uncompressed
= io
.BytesIO(gz
.read())
1398 except OSError as original_ioerror
:
1399 # There may be junk add the end of the file
1400 # See http://stackoverflow.com/q/4928560/35070 for details
1401 for i
in range(1, 1024):
1403 gz
= gzip
.GzipFile(fileobj
=io
.BytesIO(content
[:-i
]), mode
='rb')
1404 uncompressed
= io
.BytesIO(gz
.read())
1409 raise original_ioerror
1410 resp
= urllib
.request
.addinfourl(uncompressed
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
1411 resp
.msg
= old_resp
.msg
1413 if resp
.headers
.get('Content-encoding', '') == 'deflate':
1414 gz
= io
.BytesIO(self
.deflate(resp
.read()))
1415 resp
= urllib
.request
.addinfourl(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
)
1416 resp
.msg
= old_resp
.msg
1418 if resp
.headers
.get('Content-encoding', '') == 'br':
1419 resp
= urllib
.request
.addinfourl(
1420 io
.BytesIO(self
.brotli(resp
.read())), old_resp
.headers
, old_resp
.url
, old_resp
.code
)
1421 resp
.msg
= old_resp
.msg
1422 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1423 # https://github.com/ytdl-org/youtube-dl/issues/6457).
1424 if 300 <= resp
.code
< 400:
1425 location
= resp
.headers
.get('Location')
1427 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1428 location
= location
.encode('iso-8859-1').decode()
1429 location_escaped
= escape_url(location
)
1430 if location
!= location_escaped
:
1431 del resp
.headers
['Location']
1432 resp
.headers
['Location'] = location_escaped
1435 https_request
= http_request
1436 https_response
= http_response
1439 def make_socks_conn_class(base_class
, socks_proxy
):
1440 assert issubclass(base_class
, (
1441 http
.client
.HTTPConnection
, http
.client
.HTTPSConnection
))
1443 url_components
= urllib
.parse
.urlparse(socks_proxy
)
1444 if url_components
.scheme
.lower() == 'socks5':
1445 socks_type
= ProxyType
.SOCKS5
1446 elif url_components
.scheme
.lower() in ('socks', 'socks4'):
1447 socks_type
= ProxyType
.SOCKS4
1448 elif url_components
.scheme
.lower() == 'socks4a':
1449 socks_type
= ProxyType
.SOCKS4A
1451 def unquote_if_non_empty(s
):
1454 return urllib
.parse
.unquote_plus(s
)
1458 url_components
.hostname
, url_components
.port
or 1080,
1460 unquote_if_non_empty(url_components
.username
),
1461 unquote_if_non_empty(url_components
.password
),
1464 class SocksConnection(base_class
):
1466 self
.sock
= sockssocket()
1467 self
.sock
.setproxy(*proxy_args
)
1468 if isinstance(self
.timeout
, (int, float)):
1469 self
.sock
.settimeout(self
.timeout
)
1470 self
.sock
.connect((self
.host
, self
.port
))
1472 if isinstance(self
, http
.client
.HTTPSConnection
):
1473 if hasattr(self
, '_context'): # Python > 2.6
1474 self
.sock
= self
._context
.wrap_socket(
1475 self
.sock
, server_hostname
=self
.host
)
1477 self
.sock
= ssl
.wrap_socket(self
.sock
)
1479 return SocksConnection
1482 class YoutubeDLHTTPSHandler(urllib
.request
.HTTPSHandler
):
1483 def __init__(self
, params
, https_conn_class
=None, *args
, **kwargs
):
1484 urllib
.request
.HTTPSHandler
.__init
__(self
, *args
, **kwargs
)
1485 self
._https
_conn
_class
= https_conn_class
or http
.client
.HTTPSConnection
1486 self
._params
= params
1488 def https_open(self
, req
):
1490 conn_class
= self
._https
_conn
_class
1492 if hasattr(self
, '_context'): # python > 2.6
1493 kwargs
['context'] = self
._context
1494 if hasattr(self
, '_check_hostname'): # python 3.x
1495 kwargs
['check_hostname'] = self
._check
_hostname
1497 socks_proxy
= req
.headers
.get('Ytdl-socks-proxy')
1499 conn_class
= make_socks_conn_class(conn_class
, socks_proxy
)
1500 del req
.headers
['Ytdl-socks-proxy']
1503 return self
.do_open(
1504 functools
.partial(_create_http_connection
, self
, conn_class
, True), req
, **kwargs
)
1505 except urllib
.error
.URLError
as e
:
1506 if (isinstance(e
.reason
, ssl
.SSLError
)
1507 and getattr(e
.reason
, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1508 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1512 def is_path_like(f
):
1513 return isinstance(f
, (str, bytes, os
.PathLike
))
1516 class YoutubeDLCookieJar(http
.cookiejar
.MozillaCookieJar
):
1518 See [1] for cookie file format.
1520 1. https://curl.haxx.se/docs/http-cookies.html
1522 _HTTPONLY_PREFIX
= '#HttpOnly_'
1524 _HEADER
= '''# Netscape HTTP Cookie File
1525 # This file is generated by yt-dlp. Do not edit.
1528 _CookieFileEntry
= collections
.namedtuple(
1530 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1532 def __init__(self
, filename
=None, *args
, **kwargs
):
1533 super().__init
__(None, *args
, **kwargs
)
1534 if is_path_like(filename
):
1535 filename
= os
.fspath(filename
)
1536 self
.filename
= filename
1539 def _true_or_false(cndn
):
1540 return 'TRUE' if cndn
else 'FALSE'
1542 @contextlib.contextmanager
1543 def open(self
, file, *, write
=False):
1544 if is_path_like(file):
1545 with open(file, 'w' if write
else 'r', encoding
='utf-8') as f
:
1552 def _really_save(self
, f
, ignore_discard
=False, ignore_expires
=False):
1555 if (not ignore_discard
and cookie
.discard
1556 or not ignore_expires
and cookie
.is_expired(now
)):
1558 name
, value
= cookie
.name
, cookie
.value
1560 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1561 # with no name, whereas http.cookiejar regards it as a
1562 # cookie with no value.
1563 name
, value
= '', name
1564 f
.write('%s\n' % '\t'.join((
1566 self
._true
_or
_false
(cookie
.domain
.startswith('.')),
1568 self
._true
_or
_false
(cookie
.secure
),
1569 str_or_none(cookie
.expires
, default
=''),
1573 def save(self
, filename
=None, *args
, **kwargs
):
1575 Save cookies to a file.
1576 Code is taken from CPython 3.6
1577 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1579 if filename
is None:
1580 if self
.filename
is not None:
1581 filename
= self
.filename
1583 raise ValueError(http
.cookiejar
.MISSING_FILENAME_TEXT
)
1585 # Store session cookies with `expires` set to 0 instead of an empty string
1587 if cookie
.expires
is None:
1590 with self
.open(filename
, write
=True) as f
:
1591 f
.write(self
._HEADER
)
1592 self
._really
_save
(f
, *args
, **kwargs
)
1594 def load(self
, filename
=None, ignore_discard
=False, ignore_expires
=False):
1595 """Load cookies from a file."""
1596 if filename
is None:
1597 if self
.filename
is not None:
1598 filename
= self
.filename
1600 raise ValueError(http
.cookiejar
.MISSING_FILENAME_TEXT
)
1602 def prepare_line(line
):
1603 if line
.startswith(self
._HTTPONLY
_PREFIX
):
1604 line
= line
[len(self
._HTTPONLY
_PREFIX
):]
1605 # comments and empty lines are fine
1606 if line
.startswith('#') or not line
.strip():
1608 cookie_list
= line
.split('\t')
1609 if len(cookie_list
) != self
._ENTRY
_LEN
:
1610 raise http
.cookiejar
.LoadError('invalid length %d' % len(cookie_list
))
1611 cookie
= self
._CookieFileEntry
(*cookie_list
)
1612 if cookie
.expires_at
and not cookie
.expires_at
.isdigit():
1613 raise http
.cookiejar
.LoadError('invalid expires at %s' % cookie
.expires_at
)
1617 with self
.open(filename
) as f
:
1620 cf
.write(prepare_line(line
))
1621 except http
.cookiejar
.LoadError
as e
:
1622 if f
'{line.strip()} '[0] in '[{"':
1623 raise http
.cookiejar
.LoadError(
1624 'Cookies file must be Netscape formatted, not JSON. See '
1625 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1626 write_string(f
'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1629 self
._really
_load
(cf
, filename
, ignore_discard
, ignore_expires
)
1630 # Session cookies are denoted by either `expires` field set to
1631 # an empty string or 0. MozillaCookieJar only recognizes the former
1632 # (see [1]). So we need force the latter to be recognized as session
1633 # cookies on our own.
1634 # Session cookies may be important for cookies-based authentication,
1635 # e.g. usually, when user does not check 'Remember me' check box while
1636 # logging in on a site, some important cookies are stored as session
1637 # cookies so that not recognizing them will result in failed login.
1638 # 1. https://bugs.python.org/issue17164
1640 # Treat `expires=0` cookies as session cookies
1641 if cookie
.expires
== 0:
1642 cookie
.expires
= None
1643 cookie
.discard
= True
1646 class YoutubeDLCookieProcessor(urllib
.request
.HTTPCookieProcessor
):
1647 def __init__(self
, cookiejar
=None):
1648 urllib
.request
.HTTPCookieProcessor
.__init
__(self
, cookiejar
)
1650 def http_response(self
, request
, response
):
1651 return urllib
.request
.HTTPCookieProcessor
.http_response(self
, request
, response
)
1653 https_request
= urllib
.request
.HTTPCookieProcessor
.http_request
1654 https_response
= http_response
1657 class YoutubeDLRedirectHandler(urllib
.request
.HTTPRedirectHandler
):
1658 """YoutubeDL redirect handler
1660 The code is based on HTTPRedirectHandler implementation from CPython [1].
1662 This redirect handler solves two issues:
1663 - ensures redirect URL is always unicode under python 2
1664 - introduces support for experimental HTTP response status code
1665 308 Permanent Redirect [2] used by some sites [3]
1667 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1668 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1669 3. https://github.com/ytdl-org/youtube-dl/issues/28768
1672 http_error_301
= http_error_303
= http_error_307
= http_error_308
= urllib
.request
.HTTPRedirectHandler
.http_error_302
1674 def redirect_request(self
, req
, fp
, code
, msg
, headers
, newurl
):
1675 """Return a Request or None in response to a redirect.
1677 This is called by the http_error_30x methods when a
1678 redirection response is received. If a redirection should
1679 take place, return a new Request to allow http_error_30x to
1680 perform the redirect. Otherwise, raise HTTPError if no-one
1681 else should try to handle this url. Return None if you can't
1682 but another Handler might.
1684 m
= req
.get_method()
1685 if (not (code
in (301, 302, 303, 307, 308) and m
in ("GET", "HEAD")
1686 or code
in (301, 302, 303) and m
== "POST")):
1687 raise urllib
.error
.HTTPError(req
.full_url
, code
, msg
, headers
, fp
)
1688 # Strictly (according to RFC 2616), 301 or 302 in response to
1689 # a POST MUST NOT cause a redirection without confirmation
1690 # from the user (of urllib.request, in this case). In practice,
1691 # essentially all clients do redirect in this case, so we do
1694 # Be conciliant with URIs containing a space. This is mainly
1695 # redundant with the more complete encoding done in http_error_302(),
1696 # but it is kept for compatibility with other callers.
1697 newurl
= newurl
.replace(' ', '%20')
1699 CONTENT_HEADERS
= ("content-length", "content-type")
1700 # NB: don't use dict comprehension for python 2.6 compatibility
1701 newheaders
= {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1703 # A 303 must either use GET or HEAD for subsequent request
1704 # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1705 if code
== 303 and m
!= 'HEAD':
1707 # 301 and 302 redirects are commonly turned into a GET from a POST
1708 # for subsequent requests by browsers, so we'll do the same.
1709 # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1710 # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1711 if code
in (301, 302) and m
== 'POST':
1714 return urllib
.request
.Request(
1715 newurl
, headers
=newheaders
, origin_req_host
=req
.origin_req_host
,
1716 unverifiable
=True, method
=m
)
1719 def extract_timezone(date_str
):
1722 ^.{8,}? # >=8 char non-TZ prefix, if present
1723 (?P<tz>Z| # just the UTC Z, or
1724 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
1725 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1726 [ ]? # optional space
1727 (?P<sign>\+|-) # +/-
1728 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
1732 m
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
)
1733 timezone
= TIMEZONE_NAMES
.get(m
and m
.group('tz').strip())
1734 if timezone
is not None:
1735 date_str
= date_str
[:-len(m
.group('tz'))]
1736 timezone
= datetime
.timedelta(hours
=timezone
or 0)
1738 date_str
= date_str
[:-len(m
.group('tz'))]
1739 if not m
.group('sign'):
1740 timezone
= datetime
.timedelta()
1742 sign
= 1 if m
.group('sign') == '+' else -1
1743 timezone
= datetime
.timedelta(
1744 hours
=sign
* int(m
.group('hours')),
1745 minutes
=sign
* int(m
.group('minutes')))
1746 return timezone
, date_str
1749 def parse_iso8601(date_str
, delimiter
='T', timezone
=None):
1750 """ Return a UNIX timestamp from the given date """
1752 if date_str
is None:
1755 date_str
= re
.sub(r
'\.[0-9]+', '', date_str
)
1757 if timezone
is None:
1758 timezone
, date_str
= extract_timezone(date_str
)
1760 with contextlib
.suppress(ValueError):
1761 date_format
= f
'%Y-%m-%d{delimiter}%H:%M:%S'
1762 dt
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
1763 return calendar
.timegm(dt
.timetuple())
1766 def date_formats(day_first
=True):
1767 return DATE_FORMATS_DAY_FIRST
if day_first
else DATE_FORMATS_MONTH_FIRST
1770 def unified_strdate(date_str
, day_first
=True):
1771 """Return a string with the date in the format YYYYMMDD"""
1773 if date_str
is None:
1777 date_str
= date_str
.replace(',', ' ')
1778 # Remove AM/PM + timezone
1779 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
1780 _
, date_str
= extract_timezone(date_str
)
1782 for expression
in date_formats(day_first
):
1783 with contextlib
.suppress(ValueError):
1784 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
1785 if upload_date
is None:
1786 timetuple
= email
.utils
.parsedate_tz(date_str
)
1788 with contextlib
.suppress(ValueError):
1789 upload_date
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d')
1790 if upload_date
is not None:
1791 return str(upload_date
)
1794 def unified_timestamp(date_str
, day_first
=True):
1795 if date_str
is None:
1798 date_str
= re
.sub(r
'\s+', ' ', re
.sub(
1799 r
'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str
))
1801 pm_delta
= 12 if re
.search(r
'(?i)PM', date_str
) else 0
1802 timezone
, date_str
= extract_timezone(date_str
)
1804 # Remove AM/PM + timezone
1805 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
1807 # Remove unrecognized timezones from ISO 8601 alike timestamps
1808 m
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
)
1810 date_str
= date_str
[:-len(m
.group('tz'))]
1812 # Python only supports microseconds, so remove nanoseconds
1813 m
= re
.search(r
'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str
)
1815 date_str
= m
.group(1)
1817 for expression
in date_formats(day_first
):
1818 with contextlib
.suppress(ValueError):
1819 dt
= datetime
.datetime
.strptime(date_str
, expression
) - timezone
+ datetime
.timedelta(hours
=pm_delta
)
1820 return calendar
.timegm(dt
.timetuple())
1822 timetuple
= email
.utils
.parsedate_tz(date_str
)
1824 return calendar
.timegm(timetuple
) + pm_delta
* 3600 - timezone
.total_seconds()
1827 def determine_ext(url
, default_ext
='unknown_video'):
1828 if url
is None or '.' not in url
:
1830 guess
= url
.partition('?')[0].rpartition('.')[2]
1831 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
1833 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1834 elif guess
.rstrip('/') in KNOWN_EXTENSIONS
:
1835 return guess
.rstrip('/')
1840 def subtitles_filename(filename
, sub_lang
, sub_format
, expected_real_ext
=None):
1841 return replace_extension(filename
, sub_lang
+ '.' + sub_format
, expected_real_ext
)
1844 def datetime_from_str(date_str
, precision
='auto', format
='%Y%m%d'):
1846 Return a datetime object from a string.
1848 (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1850 @param format strftime format of DATE
1851 @param precision Round the datetime object: auto|microsecond|second|minute|hour|day
1852 auto: round to the unit provided in date_str (if applicable).
1854 auto_precision
= False
1855 if precision
== 'auto':
1856 auto_precision
= True
1857 precision
= 'microsecond'
1858 today
= datetime_round(datetime
.datetime
.utcnow(), precision
)
1859 if date_str
in ('now', 'today'):
1861 if date_str
== 'yesterday':
1862 return today
- datetime
.timedelta(days
=1)
1864 r
'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1866 if match
is not None:
1867 start_time
= datetime_from_str(match
.group('start'), precision
, format
)
1868 time
= int(match
.group('time')) * (-1 if match
.group('sign') == '-' else 1)
1869 unit
= match
.group('unit')
1870 if unit
== 'month' or unit
== 'year':
1871 new_date
= datetime_add_months(start_time
, time
* 12 if unit
== 'year' else time
)
1877 delta
= datetime
.timedelta(**{unit + 's': time}
)
1878 new_date
= start_time
+ delta
1880 return datetime_round(new_date
, unit
)
1883 return datetime_round(datetime
.datetime
.strptime(date_str
, format
), precision
)
1886 def date_from_str(date_str
, format
='%Y%m%d', strict
=False):
1888 Return a date object from a string using datetime_from_str
1890 @param strict Restrict allowed patterns to "YYYYMMDD" and
1891 (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1893 if strict
and not re
.fullmatch(r
'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str
):
1894 raise ValueError(f
'Invalid date format "{date_str}"')
1895 return datetime_from_str(date_str
, precision
='microsecond', format
=format
).date()
1898 def datetime_add_months(dt
, months
):
1899 """Increment/Decrement a datetime object by months."""
1900 month
= dt
.month
+ months
- 1
1901 year
= dt
.year
+ month
// 12
1902 month
= month
% 12 + 1
1903 day
= min(dt
.day
, calendar
.monthrange(year
, month
)[1])
1904 return dt
.replace(year
, month
, day
)
1907 def datetime_round(dt
, precision
='day'):
1909 Round a datetime object's time to a specific precision
1911 if precision
== 'microsecond':
1920 roundto
= lambda x
, n
: ((x
+ n
/ 2) // n
) * n
1921 timestamp
= calendar
.timegm(dt
.timetuple())
1922 return datetime
.datetime
.utcfromtimestamp(roundto(timestamp
, unit_seconds
[precision
]))
1925 def hyphenate_date(date_str
):
1927 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1928 match
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
)
1929 if match
is not None:
1930 return '-'.join(match
.groups())
1936 """Represents a time interval between two dates"""
1938 def __init__(self
, start
=None, end
=None):
1939 """start and end must be strings in the format accepted by date"""
1940 if start
is not None:
1941 self
.start
= date_from_str(start
, strict
=True)
1943 self
.start
= datetime
.datetime
.min.date()
1945 self
.end
= date_from_str(end
, strict
=True)
1947 self
.end
= datetime
.datetime
.max.date()
1948 if self
.start
> self
.end
:
1949 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
1953 """Returns a range that only contains the given day"""
1954 return cls(day
, day
)
1956 def __contains__(self
, date
):
1957 """Check if the date is in the range"""
1958 if not isinstance(date
, datetime
.date
):
1959 date
= date_from_str(date
)
1960 return self
.start
<= date
<= self
.end
1963 return f
'{__name__}.{type(self).__name__}({self.start.isoformat()!r}, {self.end.isoformat()!r})'
1965 def __eq__(self
, other
):
1966 return (isinstance(other
, DateRange
)
1967 and self
.start
== other
.start
and self
.end
== other
.end
)
1971 def system_identifier():
1972 python_implementation
= platform
.python_implementation()
1973 if python_implementation
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
1974 python_implementation
+= ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
1976 with contextlib
.suppress(OSError): # We may not have access to the executable
1977 libc_ver
= platform
.libc_ver()
1979 return 'Python %s (%s %s %s) - %s (%s%s)' % (
1980 platform
.python_version(),
1981 python_implementation
,
1983 platform
.architecture()[0],
1984 platform
.platform(),
1985 ssl
.OPENSSL_VERSION
,
1986 format_field(join_nonempty(*libc_ver
, delim
=' '), None, ', %s'),
1991 def get_windows_version():
1992 ''' Get Windows version. returns () if it's not running on Windows '''
1993 if compat_os_name
== 'nt':
1994 return version_tuple(platform
.win32_ver()[1])
1999 def write_string(s
, out
=None, encoding
=None):
2000 assert isinstance(s
, str)
2001 out
= out
or sys
.stderr
2002 # `sys.stderr` might be `None` (Ref: https://github.com/pyinstaller/pyinstaller/pull/7217)
2006 if compat_os_name
== 'nt' and supports_terminal_sequences(out
):
2007 s
= re
.sub(r
'([\r\n]+)', r
' \1', s
)
2009 enc
, buffer = None, out
2010 if 'b' in getattr(out
, 'mode', ''):
2011 enc
= encoding
or preferredencoding()
2012 elif hasattr(out
, 'buffer'):
2014 enc
= encoding
or getattr(out
, 'encoding', None) or preferredencoding()
2016 buffer.write(s
.encode(enc
, 'ignore') if enc
else s
)
2020 def deprecation_warning(msg
, *, printer
=None, stacklevel
=0, **kwargs
):
2021 from .. import _IN_CLI
2023 if msg
in deprecation_warning
._cache
:
2025 deprecation_warning
._cache
.add(msg
)
2027 return printer(f
'{msg}{bug_reports_message()}', **kwargs
)
2028 return write_string(f
'ERROR: {msg}{bug_reports_message()}\n', **kwargs
)
2031 warnings
.warn(DeprecationWarning(msg
), stacklevel
=stacklevel
+ 3)
2034 deprecation_warning
._cache
= set()
2037 def bytes_to_intlist(bs
):
2040 if isinstance(bs
[0], int): # Python 3
2043 return [ord(c
) for c
in bs
]
2046 def intlist_to_bytes(xs
):
2049 return struct
.pack('%dB' % len(xs
), *xs
)
2052 class LockingUnsupportedError(OSError):
2053 msg
= 'File locking is not supported'
2056 super().__init
__(self
.msg
)
2059 # Cross-platform file locking
2060 if sys
.platform
== 'win32':
2062 import ctypes
.wintypes
2065 class OVERLAPPED(ctypes
.Structure
):
2067 ('Internal', ctypes
.wintypes
.LPVOID
),
2068 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
2069 ('Offset', ctypes
.wintypes
.DWORD
),
2070 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
2071 ('hEvent', ctypes
.wintypes
.HANDLE
),
2074 kernel32
= ctypes
.WinDLL('kernel32')
2075 LockFileEx
= kernel32
.LockFileEx
2076 LockFileEx
.argtypes
= [
2077 ctypes
.wintypes
.HANDLE
, # hFile
2078 ctypes
.wintypes
.DWORD
, # dwFlags
2079 ctypes
.wintypes
.DWORD
, # dwReserved
2080 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
2081 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
2082 ctypes
.POINTER(OVERLAPPED
) # Overlapped
2084 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
2085 UnlockFileEx
= kernel32
.UnlockFileEx
2086 UnlockFileEx
.argtypes
= [
2087 ctypes
.wintypes
.HANDLE
, # hFile
2088 ctypes
.wintypes
.DWORD
, # dwReserved
2089 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
2090 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
2091 ctypes
.POINTER(OVERLAPPED
) # Overlapped
2093 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
2094 whole_low
= 0xffffffff
2095 whole_high
= 0x7fffffff
2097 def _lock_file(f
, exclusive
, block
):
2098 overlapped
= OVERLAPPED()
2099 overlapped
.Offset
= 0
2100 overlapped
.OffsetHigh
= 0
2101 overlapped
.hEvent
= 0
2102 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
2104 if not LockFileEx(msvcrt
.get_osfhandle(f
.fileno()),
2105 (0x2 if exclusive
else 0x0) |
(0x0 if block
else 0x1),
2106 0, whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
2107 # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2108 raise BlockingIOError(f
'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2110 def _unlock_file(f
):
2111 assert f
._lock
_file
_overlapped
_p
2112 handle
= msvcrt
.get_osfhandle(f
.fileno())
2113 if not UnlockFileEx(handle
, 0, whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
2114 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
2120 def _lock_file(f
, exclusive
, block
):
2121 flags
= fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
2123 flags |
= fcntl
.LOCK_NB
2125 fcntl
.flock(f
, flags
)
2126 except BlockingIOError
:
2128 except OSError: # AOSP does not have flock()
2129 fcntl
.lockf(f
, flags
)
2131 def _unlock_file(f
):
2132 with contextlib
.suppress(OSError):
2133 return fcntl
.flock(f
, fcntl
.LOCK_UN
)
2134 with contextlib
.suppress(OSError):
2135 return fcntl
.lockf(f
, fcntl
.LOCK_UN
) # AOSP does not have flock()
2136 return fcntl
.flock(f
, fcntl
.LOCK_UN | fcntl
.LOCK_NB
) # virtiofs needs LOCK_NB on unlocking
2140 def _lock_file(f
, exclusive
, block
):
2141 raise LockingUnsupportedError()
2143 def _unlock_file(f
):
2144 raise LockingUnsupportedError()
2150 def __init__(self
, filename
, mode
, block
=True, encoding
=None):
2151 if mode
not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}
:
2152 raise NotImplementedError(mode
)
2153 self
.mode
, self
.block
= mode
, block
2155 writable
= any(f
in mode
for f
in 'wax+')
2156 readable
= any(f
in mode
for f
in 'r+')
2157 flags
= functools
.reduce(operator
.ior
, (
2158 getattr(os
, 'O_CLOEXEC', 0), # UNIX only
2159 getattr(os
, 'O_BINARY', 0), # Windows only
2160 getattr(os
, 'O_NOINHERIT', 0), # Windows only
2161 os
.O_CREAT
if writable
else 0, # O_TRUNC only after locking
2162 os
.O_APPEND
if 'a' in mode
else 0,
2163 os
.O_EXCL
if 'x' in mode
else 0,
2164 os
.O_RDONLY
if not writable
else os
.O_RDWR
if readable
else os
.O_WRONLY
,
2167 self
.f
= os
.fdopen(os
.open(filename
, flags
, 0o666), mode
, encoding
=encoding
)
2169 def __enter__(self
):
2170 exclusive
= 'r' not in self
.mode
2172 _lock_file(self
.f
, exclusive
, self
.block
)
2177 if 'w' in self
.mode
:
2180 except OSError as e
:
2182 errno
.ESPIPE
, # Illegal seek - expected for FIFO
2183 errno
.EINVAL
, # Invalid argument - expected for /dev/null
2192 _unlock_file(self
.f
)
2196 def __exit__(self
, *_
):
2205 def __getattr__(self
, attr
):
2206 return getattr(self
.f
, attr
)
2213 def get_filesystem_encoding():
2214 encoding
= sys
.getfilesystemencoding()
2215 return encoding
if encoding
is not None else 'utf-8'
2218 def shell_quote(args
):
2220 encoding
= get_filesystem_encoding()
2222 if isinstance(a
, bytes):
2223 # We may get a filename encoded with 'encodeFilename'
2224 a
= a
.decode(encoding
)
2225 quoted_args
.append(compat_shlex_quote(a
))
2226 return ' '.join(quoted_args
)
2229 def smuggle_url(url
, data
):
2230 """ Pass additional data in a URL for internal use. """
2232 url
, idata
= unsmuggle_url(url
, {})
2234 sdata
= urllib
.parse
.urlencode(
2235 {'__youtubedl_smuggle': json.dumps(data)}
)
2236 return url
+ '#' + sdata
2239 def unsmuggle_url(smug_url
, default
=None):
2240 if '#__youtubedl_smuggle' not in smug_url
:
2241 return smug_url
, default
2242 url
, _
, sdata
= smug_url
.rpartition('#')
2243 jsond
= urllib
.parse
.parse_qs(sdata
)['__youtubedl_smuggle'][0]
2244 data
= json
.loads(jsond
)
2248 def format_decimal_suffix(num
, fmt
='%d%s', *, factor
=1000):
2249 """ Formats numbers with decimal sufixes like K, M, etc """
2250 num
, factor
= float_or_none(num
), float(factor
)
2251 if num
is None or num
< 0:
2253 POSSIBLE_SUFFIXES
= 'kMGTPEZY'
2254 exponent
= 0 if num
== 0 else min(int(math
.log(num
, factor
)), len(POSSIBLE_SUFFIXES
))
2255 suffix
= ['', *POSSIBLE_SUFFIXES
][exponent
]
2257 suffix
= {'k': 'Ki', '': ''}
.get(suffix
, f
'{suffix}i')
2258 converted
= num
/ (factor
** exponent
)
2259 return fmt
% (converted
, suffix
)
2262 def format_bytes(bytes):
2263 return format_decimal_suffix(bytes, '%.2f%sB', factor
=1024) or 'N/A'
2266 def lookup_unit_table(unit_table
, s
, strict
=False):
2267 num_re
= NUMBER_RE
if strict
else NUMBER_RE
.replace(R
'\.', '[,.]')
2268 units_re
= '|'.join(re
.escape(u
) for u
in unit_table
)
2269 m
= (re
.fullmatch
if strict
else re
.match
)(
2270 rf
'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s
)
2274 num
= float(m
.group('num').replace(',', '.'))
2275 mult
= unit_table
[m
.group('unit')]
2276 return round(num
* mult
)
2280 """Parse a string indicating a byte quantity into an integer"""
2281 return lookup_unit_table(
2282 {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])}
,
2283 s
.upper(), strict
=True)
2286 def parse_filesize(s
):
2290 # The lower-case forms are of course incorrect and unofficial,
2291 # but we support those too
2308 'megabytes': 1000 ** 2,
2309 'mebibytes': 1024 ** 2,
2315 'gigabytes': 1000 ** 3,
2316 'gibibytes': 1024 ** 3,
2322 'terabytes': 1000 ** 4,
2323 'tebibytes': 1024 ** 4,
2329 'petabytes': 1000 ** 5,
2330 'pebibytes': 1024 ** 5,
2336 'exabytes': 1000 ** 6,
2337 'exbibytes': 1024 ** 6,
2343 'zettabytes': 1000 ** 7,
2344 'zebibytes': 1024 ** 7,
2350 'yottabytes': 1000 ** 8,
2351 'yobibytes': 1024 ** 8,
2354 return lookup_unit_table(_UNIT_TABLE
, s
)
2361 s
= re
.sub(r
'^[^\d]+\s', '', s
).strip()
2363 if re
.match(r
'^[\d,.]+$', s
):
2364 return str_to_int(s
)
2377 ret
= lookup_unit_table(_UNIT_TABLE
, s
)
2381 mobj
= re
.match(r
'([\d,.]+)(?:$|\s)', s
)
2383 return str_to_int(mobj
.group(1))
2386 def parse_resolution(s
, *, lenient
=False):
2391 mobj
= re
.search(r
'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s
)
2393 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s
)
2396 'width': int(mobj
.group('w')),
2397 'height': int(mobj
.group('h')),
2400 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s
)
2402 return {'height': int(mobj.group(1))}
2404 mobj
= re
.search(r
'\b([48])[kK]\b', s
)
2406 return {'height': int(mobj.group(1)) * 540}
2411 def parse_bitrate(s
):
2412 if not isinstance(s
, str):
2414 mobj
= re
.search(r
'\b(\d+)\s*kbps', s
)
2416 return int(mobj
.group(1))
2419 def month_by_name(name
, lang
='en'):
2420 """ Return the number of a month by (locale-independently) English name """
2422 month_names
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en'])
2425 return month_names
.index(name
) + 1
2430 def month_by_abbreviation(abbrev
):
2431 """ Return the number of a month by (locale-independently) English
2435 return [s
[:3] for s
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1
2440 def fix_xml_ampersands(xml_str
):
2441 """Replace all the '&' by '&' in XML"""
2443 r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2448 def setproctitle(title
):
2449 assert isinstance(title
, str)
2451 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
2458 libc
= ctypes
.cdll
.LoadLibrary('libc.so.6')
2462 # LoadLibrary in Windows Python 2.7.13 only expects
2463 # a bytestring, but since unicode_literals turns
2464 # every string into a unicode string, it fails.
2466 title_bytes
= title
.encode()
2467 buf
= ctypes
.create_string_buffer(len(title_bytes
))
2468 buf
.value
= title_bytes
2470 libc
.prctl(15, buf
, 0, 0, 0)
2471 except AttributeError:
2472 return # Strange libc, just skip this
2475 def remove_start(s
, start
):
2476 return s
[len(start
):] if s
is not None and s
.startswith(start
) else s
2479 def remove_end(s
, end
):
2480 return s
[:-len(end
)] if s
is not None and s
.endswith(end
) else s
2483 def remove_quotes(s
):
2484 if s
is None or len(s
) < 2:
2486 for quote
in ('"', "'", ):
2487 if s
[0] == quote
and s
[-1] == quote
:
2492 def get_domain(url
):
2494 This implementation is inconsistent, but is kept for compatibility.
2495 Use this only for "webpage_url_domain"
2497 return remove_start(urllib
.parse
.urlparse(url
).netloc
, 'www.') or None
2500 def url_basename(url
):
2501 path
= urllib
.parse
.urlparse(url
).path
2502 return path
.strip('/').split('/')[-1]
2506 return re
.match(r
'https?://[^?#]+/', url
).group()
2509 def urljoin(base
, path
):
2510 if isinstance(path
, bytes):
2511 path
= path
.decode()
2512 if not isinstance(path
, str) or not path
:
2514 if re
.match(r
'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path
):
2516 if isinstance(base
, bytes):
2517 base
= base
.decode()
2518 if not isinstance(base
, str) or not re
.match(
2519 r
'^(?:https?:)?//', base
):
2521 return urllib
.parse
.urljoin(base
, path
)
2524 class HEADRequest(urllib
.request
.Request
):
2525 def get_method(self
):
2529 class PUTRequest(urllib
.request
.Request
):
2530 def get_method(self
):
2534 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1):
2535 if get_attr
and v
is not None:
2536 v
= getattr(v
, get_attr
, None)
2538 return int(v
) * invscale
// scale
2539 except (ValueError, TypeError, OverflowError):
2543 def str_or_none(v
, default
=None):
2544 return default
if v
is None else str(v
)
2547 def str_to_int(int_str
):
2548 """ A more relaxed version of int_or_none """
2549 if isinstance(int_str
, int):
2551 elif isinstance(int_str
, str):
2552 int_str
= re
.sub(r
'[,\.\+]', '', int_str
)
2553 return int_or_none(int_str
)
2556 def float_or_none(v
, scale
=1, invscale
=1, default
=None):
2560 return float(v
) * invscale
/ scale
2561 except (ValueError, TypeError):
2565 def bool_or_none(v
, default
=None):
2566 return v
if isinstance(v
, bool) else default
2569 def strip_or_none(v
, default
=None):
2570 return v
.strip() if isinstance(v
, str) else default
2573 def url_or_none(url
):
2574 if not url
or not isinstance(url
, str):
2577 return url
if re
.match(r
'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url
) else None
2580 def request_to_url(req
):
2581 if isinstance(req
, urllib
.request
.Request
):
2582 return req
.get_full_url()
2587 def strftime_or_none(timestamp
, date_format
, default
=None):
2588 datetime_object
= None
2590 if isinstance(timestamp
, (int, float)): # unix timestamp
2591 # Using naive datetime here can break timestamp() in Windows
2592 # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
2593 datetime_object
= datetime
.datetime
.fromtimestamp(timestamp
, datetime
.timezone
.utc
)
2594 elif isinstance(timestamp
, str): # assume YYYYMMDD
2595 datetime_object
= datetime
.datetime
.strptime(timestamp
, '%Y%m%d')
2596 date_format
= re
.sub( # Support %s on windows
2597 r
'(?<!%)(%%)*%s', rf
'\g<1>{int(datetime_object.timestamp())}', date_format
)
2598 return datetime_object
.strftime(date_format
)
2599 except (ValueError, TypeError, AttributeError):
2603 def parse_duration(s
):
2604 if not isinstance(s
, str):
2610 days
, hours
, mins
, secs
, ms
= [None] * 5
2611 m
= re
.match(r
'''(?x)
2613 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2614 (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2615 (?P<ms>[.:][0-9]+)?Z?$
2618 days
, hours
, mins
, secs
, ms
= m
.group('days', 'hours', 'mins', 'secs', 'ms')
2623 [0-9]+\s*y(?:ears?)?,?\s*
2626 [0-9]+\s*m(?:onths?)?,?\s*
2629 [0-9]+\s*w(?:eeks?)?,?\s*
2632 (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2636 (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2639 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2642 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2645 days
, hours
, mins
, secs
, ms
= m
.groups()
2647 m
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
)
2649 hours
, mins
= m
.groups()
2654 ms
= ms
.replace(':', '.')
2655 return sum(float(part
or 0) * mult
for part
, mult
in (
2656 (days
, 86400), (hours
, 3600), (mins
, 60), (secs
, 1), (ms
, 1)))
2659 def prepend_extension(filename
, ext
, expected_real_ext
=None):
2660 name
, real_ext
= os
.path
.splitext(filename
)
2662 f
'{name}.{ext}{real_ext}'
2663 if not expected_real_ext
or real_ext
[1:] == expected_real_ext
2664 else f
'{filename}.{ext}')
2667 def replace_extension(filename
, ext
, expected_real_ext
=None):
2668 name
, real_ext
= os
.path
.splitext(filename
)
2669 return '{}.{}'.format(
2670 name
if not expected_real_ext
or real_ext
[1:] == expected_real_ext
else filename
,
2674 def check_executable(exe
, args
=[]):
2675 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2676 args can be a list of arguments for a short output (like -version) """
2678 Popen
.run([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2684 def _get_exe_version_output(exe
, args
):
2686 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2687 # SIGTTOU if yt-dlp is run in the background.
2688 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2689 stdout
, _
, ret
= Popen
.run([encodeArgument(exe
)] + args
, text
=True,
2690 stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
)
2698 def detect_exe_version(output
, version_re
=None, unrecognized
='present'):
2699 assert isinstance(output
, str)
2700 if version_re
is None:
2701 version_re
= r
'version\s+([-0-9._a-zA-Z]+)'
2702 m
= re
.search(version_re
, output
)
2709 def get_exe_version(exe
, args
=['--version'],
2710 version_re
=None, unrecognized
=('present', 'broken')):
2711 """ Returns the version of the specified executable,
2712 or False if the executable is not present """
2713 unrecognized
= variadic(unrecognized
)
2714 assert len(unrecognized
) in (1, 2)
2715 out
= _get_exe_version_output(exe
, args
)
2717 return unrecognized
[-1]
2718 return out
and detect_exe_version(out
, version_re
, unrecognized
[0])
2721 def frange(start
=0, stop
=None, step
=1):
2724 start
, stop
= 0, start
2725 sign
= [-1, 1][step
> 0] if step
else 0
2726 while sign
* start
< sign
* stop
:
2731 class LazyList(collections
.abc
.Sequence
):
2732 """Lazy immutable list from an iterable
2733 Note that slices of a LazyList are lists and not LazyList"""
2735 class IndexError(IndexError):
2738 def __init__(self
, iterable
, *, reverse
=False, _cache
=None):
2739 self
._iterable
= iter(iterable
)
2740 self
._cache
= [] if _cache
is None else _cache
2741 self
._reversed
= reverse
2745 # We need to consume the entire iterable to iterate in reverse
2746 yield from self
.exhaust()
2748 yield from self
._cache
2749 for item
in self
._iterable
:
2750 self
._cache
.append(item
)
2754 self
._cache
.extend(self
._iterable
)
2755 self
._iterable
= [] # Discard the emptied iterable to make it pickle-able
2759 """Evaluate the entire iterable"""
2760 return self
._exhaust
()[::-1 if self
._reversed
else 1]
2763 def _reverse_index(x
):
2764 return None if x
is None else ~x
2766 def __getitem__(self
, idx
):
2767 if isinstance(idx
, slice):
2769 idx
= slice(self
._reverse
_index
(idx
.start
), self
._reverse
_index
(idx
.stop
), -(idx
.step
or 1))
2770 start
, stop
, step
= idx
.start
, idx
.stop
, idx
.step
or 1
2771 elif isinstance(idx
, int):
2773 idx
= self
._reverse
_index
(idx
)
2774 start
, stop
, step
= idx
, idx
, 0
2776 raise TypeError('indices must be integers or slices')
2777 if ((start
or 0) < 0 or (stop
or 0) < 0
2778 or (start
is None and step
< 0)
2779 or (stop
is None and step
> 0)):
2780 # We need to consume the entire iterable to be able to slice from the end
2781 # Obviously, never use this with infinite iterables
2784 return self
._cache
[idx
]
2785 except IndexError as e
:
2786 raise self
.IndexError(e
) from e
2787 n
= max(start
or 0, stop
or 0) - len(self
._cache
) + 1
2789 self
._cache
.extend(itertools
.islice(self
._iterable
, n
))
2791 return self
._cache
[idx
]
2792 except IndexError as e
:
2793 raise self
.IndexError(e
) from e
2797 self
[-1] if self
._reversed
else self
[0]
2798 except self
.IndexError:
2804 return len(self
._cache
)
2806 def __reversed__(self
):
2807 return type(self
)(self
._iterable
, reverse
=not self
._reversed
, _cache
=self
._cache
)
2810 return type(self
)(self
._iterable
, reverse
=self
._reversed
, _cache
=self
._cache
)
2813 # repr and str should mimic a list. So we exhaust the iterable
2814 return repr(self
.exhaust())
2817 return repr(self
.exhaust())
2822 class IndexError(IndexError):
2826 # This is only useful for tests
2827 return len(self
.getslice())
2829 def __init__(self
, pagefunc
, pagesize
, use_cache
=True):
2830 self
._pagefunc
= pagefunc
2831 self
._pagesize
= pagesize
2832 self
._pagecount
= float('inf')
2833 self
._use
_cache
= use_cache
2836 def getpage(self
, pagenum
):
2837 page_results
= self
._cache
.get(pagenum
)
2838 if page_results
is None:
2839 page_results
= [] if pagenum
> self
._pagecount
else list(self
._pagefunc
(pagenum
))
2841 self
._cache
[pagenum
] = page_results
2844 def getslice(self
, start
=0, end
=None):
2845 return list(self
._getslice
(start
, end
))
2847 def _getslice(self
, start
, end
):
2848 raise NotImplementedError('This method must be implemented by subclasses')
2850 def __getitem__(self
, idx
):
2851 assert self
._use
_cache
, 'Indexing PagedList requires cache'
2852 if not isinstance(idx
, int) or idx
< 0:
2853 raise TypeError('indices must be non-negative integers')
2854 entries
= self
.getslice(idx
, idx
+ 1)
2856 raise self
.IndexError()
2860 class OnDemandPagedList(PagedList
):
2861 """Download pages until a page with less than maximum results"""
2863 def _getslice(self
, start
, end
):
2864 for pagenum
in itertools
.count(start
// self
._pagesize
):
2865 firstid
= pagenum
* self
._pagesize
2866 nextfirstid
= pagenum
* self
._pagesize
+ self
._pagesize
2867 if start
>= nextfirstid
:
2871 start
% self
._pagesize
2872 if firstid
<= start
< nextfirstid
2875 ((end
- 1) % self
._pagesize
) + 1
2876 if (end
is not None and firstid
<= end
<= nextfirstid
)
2880 page_results
= self
.getpage(pagenum
)
2882 self
._pagecount
= pagenum
- 1
2884 if startv
!= 0 or endv
is not None:
2885 page_results
= page_results
[startv
:endv
]
2886 yield from page_results
2888 # A little optimization - if current page is not "full", ie. does
2889 # not contain page_size videos then we can assume that this page
2890 # is the last one - there are no more ids on further pages -
2891 # i.e. no need to query again.
2892 if len(page_results
) + startv
< self
._pagesize
:
2895 # If we got the whole page, but the next page is not interesting,
2896 # break out early as well
2897 if end
== nextfirstid
:
2901 class InAdvancePagedList(PagedList
):
2902 """PagedList with total number of pages known in advance"""
2904 def __init__(self
, pagefunc
, pagecount
, pagesize
):
2905 PagedList
.__init
__(self
, pagefunc
, pagesize
, True)
2906 self
._pagecount
= pagecount
2908 def _getslice(self
, start
, end
):
2909 start_page
= start
// self
._pagesize
2910 end_page
= self
._pagecount
if end
is None else min(self
._pagecount
, end
// self
._pagesize
+ 1)
2911 skip_elems
= start
- start_page
* self
._pagesize
2912 only_more
= None if end
is None else end
- start
2913 for pagenum
in range(start_page
, end_page
):
2914 page_results
= self
.getpage(pagenum
)
2916 page_results
= page_results
[skip_elems
:]
2918 if only_more
is not None:
2919 if len(page_results
) < only_more
:
2920 only_more
-= len(page_results
)
2922 yield from page_results
[:only_more
]
2924 yield from page_results
2927 class PlaylistEntries
:
2928 MissingEntry
= object()
2929 is_exhausted
= False
2931 def __init__(self
, ydl
, info_dict
):
2934 # _entries must be assigned now since infodict can change during iteration
2935 entries
= info_dict
.get('entries')
2937 raise EntryNotInPlaylist('There are no entries')
2938 elif isinstance(entries
, list):
2939 self
.is_exhausted
= True
2941 requested_entries
= info_dict
.get('requested_entries')
2942 self
.is_incomplete
= requested_entries
is not None
2943 if self
.is_incomplete
:
2944 assert self
.is_exhausted
2945 self
._entries
= [self
.MissingEntry
] * max(requested_entries
or [0])
2946 for i
, entry
in zip(requested_entries
, entries
):
2947 self
._entries
[i
- 1] = entry
2948 elif isinstance(entries
, (list, PagedList
, LazyList
)):
2949 self
._entries
= entries
2951 self
._entries
= LazyList(entries
)
2953 PLAYLIST_ITEMS_RE
= re
.compile(r
'''(?x)
2954 (?P<start>[+-]?\d+)?
2956 (?P<end>[+-]?\d+|inf(?:inite)?)?
2957 (?::(?P<step>[+-]?\d+))?
2961 def parse_playlist_items(cls
, string
):
2962 for segment
in string
.split(','):
2964 raise ValueError('There is two or more consecutive commas')
2965 mobj
= cls
.PLAYLIST_ITEMS_RE
.fullmatch(segment
)
2967 raise ValueError(f
'{segment!r} is not a valid specification')
2968 start
, end
, step
, has_range
= mobj
.group('start', 'end', 'step', 'range')
2969 if int_or_none(step
) == 0:
2970 raise ValueError(f
'Step in {segment!r} cannot be zero')
2971 yield slice(int_or_none(start
), float_or_none(end
), int_or_none(step
)) if has_range
else int(start
)
2973 def get_requested_items(self
):
2974 playlist_items
= self
.ydl
.params
.get('playlist_items')
2975 playlist_start
= self
.ydl
.params
.get('playliststart', 1)
2976 playlist_end
= self
.ydl
.params
.get('playlistend')
2977 # For backwards compatibility, interpret -1 as whole list
2978 if playlist_end
in (-1, None):
2980 if not playlist_items
:
2981 playlist_items
= f
'{playlist_start}:{playlist_end}'
2982 elif playlist_start
!= 1 or playlist_end
:
2983 self
.ydl
.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once
=True)
2985 for index
in self
.parse_playlist_items(playlist_items
):
2986 for i
, entry
in self
[index
]:
2991 # The item may have just been added to archive. Don't break due to it
2992 if not self
.ydl
.params
.get('lazy_playlist'):
2993 # TODO: Add auto-generated fields
2994 self
.ydl
._match
_entry
(entry
, incomplete
=True, silent
=True)
2995 except (ExistingVideoReached
, RejectedVideoReached
):
2998 def get_full_count(self
):
2999 if self
.is_exhausted
and not self
.is_incomplete
:
3001 elif isinstance(self
._entries
, InAdvancePagedList
):
3002 if self
._entries
._pagesize
== 1:
3003 return self
._entries
._pagecount
3005 @functools.cached_property
3007 if isinstance(self
._entries
, list):
3010 entry
= self
._entries
[i
]
3012 entry
= self
.MissingEntry
3013 if not self
.is_incomplete
:
3014 raise self
.IndexError()
3015 if entry
is self
.MissingEntry
:
3016 raise EntryNotInPlaylist(f
'Entry {i + 1} cannot be found')
3021 return type(self
.ydl
)._handle
_extraction
_exceptions
(lambda _
, i
: self
._entries
[i
])(self
.ydl
, i
)
3022 except (LazyList
.IndexError, PagedList
.IndexError):
3023 raise self
.IndexError()
3026 def __getitem__(self
, idx
):
3027 if isinstance(idx
, int):
3028 idx
= slice(idx
, idx
)
3030 # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
3031 step
= 1 if idx
.step
is None else idx
.step
3032 if idx
.start
is None:
3033 start
= 0 if step
> 0 else len(self
) - 1
3035 start
= idx
.start
- 1 if idx
.start
>= 0 else len(self
) + idx
.start
3037 # NB: Do not call len(self) when idx == [:]
3038 if idx
.stop
is None:
3039 stop
= 0 if step
< 0 else float('inf')
3041 stop
= idx
.stop
- 1 if idx
.stop
>= 0 else len(self
) + idx
.stop
3042 stop
+= [-1, 1][step
> 0]
3044 for i
in frange(start
, stop
, step
):
3048 entry
= self
._getter
(i
)
3049 except self
.IndexError:
3050 self
.is_exhausted
= True
3057 return len(tuple(self
[:]))
3059 class IndexError(IndexError):
3063 def uppercase_escape(s
):
3064 unicode_escape
= codecs
.getdecoder('unicode_escape')
3066 r
'\\U[0-9a-fA-F]{8}',
3067 lambda m
: unicode_escape(m
.group(0))[0],
3071 def lowercase_escape(s
):
3072 unicode_escape
= codecs
.getdecoder('unicode_escape')
3074 r
'\\u[0-9a-fA-F]{4}',
3075 lambda m
: unicode_escape(m
.group(0))[0],
3079 def escape_rfc3986(s
):
3080 """Escape non-ASCII characters as suggested by RFC 3986"""
3081 return urllib
.parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]")
3084 def escape_url(url
):
3085 """Escape URL as suggested by RFC 3986"""
3086 url_parsed
= urllib
.parse
.urlparse(url
)
3087 return url_parsed
._replace
(
3088 netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'),
3089 path
=escape_rfc3986(url_parsed
.path
),
3090 params
=escape_rfc3986(url_parsed
.params
),
3091 query
=escape_rfc3986(url_parsed
.query
),
3092 fragment
=escape_rfc3986(url_parsed
.fragment
)
3096 def parse_qs(url
, **kwargs
):
3097 return urllib
.parse
.parse_qs(urllib
.parse
.urlparse(url
).query
, **kwargs
)
3100 def read_batch_urls(batch_fd
):
3102 if not isinstance(url
, str):
3103 url
= url
.decode('utf-8', 'replace')
3104 BOM_UTF8
= ('\xef\xbb\xbf', '\ufeff')
3105 for bom
in BOM_UTF8
:
3106 if url
.startswith(bom
):
3107 url
= url
[len(bom
):]
3109 if not url
or url
.startswith(('#', ';', ']')):
3111 # "#" cannot be stripped out since it is part of the URI
3112 # However, it can be safely stripped out if following a whitespace
3113 return re
.split(r
'\s#', url
, 1)[0].rstrip()
3115 with contextlib
.closing(batch_fd
) as fd
:
3116 return [url
for url
in map(fixup
, fd
) if url
]
3119 def urlencode_postdata(*args
, **kargs
):
3120 return urllib
.parse
.urlencode(*args
, **kargs
).encode('ascii')
3123 def update_url(url
, *, query_update
=None, **kwargs
):
3124 """Replace URL components specified by kwargs
3125 @param url str or parse url tuple
3126 @param query_update update query
3129 if isinstance(url
, str):
3130 if not kwargs
and not query_update
:
3133 url
= urllib
.parse
.urlparse(url
)
3135 assert 'query' not in kwargs
, 'query_update and query cannot be specified at the same time'
3136 kwargs
['query'] = urllib
.parse
.urlencode({
3137 **urllib
.parse
.parse_qs(url
.query
),
3140 return urllib
.parse
.urlunparse(url
._replace
(**kwargs
))
3143 def update_url_query(url
, query
):
3144 return update_url(url
, query_update
=query
)
3147 def update_Request(req
, url
=None, data
=None, headers
=None, query
=None):
3148 req_headers
= req
.headers
.copy()
3149 req_headers
.update(headers
or {})
3150 req_data
= data
or req
.data
3151 req_url
= update_url_query(url
or req
.get_full_url(), query
)
3152 req_get_method
= req
.get_method()
3153 if req_get_method
== 'HEAD':
3154 req_type
= HEADRequest
3155 elif req_get_method
== 'PUT':
3156 req_type
= PUTRequest
3158 req_type
= urllib
.request
.Request
3160 req_url
, data
=req_data
, headers
=req_headers
,
3161 origin_req_host
=req
.origin_req_host
, unverifiable
=req
.unverifiable
)
3162 if hasattr(req
, 'timeout'):
3163 new_req
.timeout
= req
.timeout
3167 def _multipart_encode_impl(data
, boundary
):
3168 content_type
= 'multipart/form-data; boundary=%s' % boundary
3171 for k
, v
in data
.items():
3172 out
+= b
'--' + boundary
.encode('ascii') + b
'\r\n'
3173 if isinstance(k
, str):
3175 if isinstance(v
, str):
3177 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3178 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3179 content
= b
'Content-Disposition: form-data; name="' + k
+ b
'"\r\n\r\n' + v
+ b
'\r\n'
3180 if boundary
.encode('ascii') in content
:
3181 raise ValueError('Boundary overlaps with data')
3184 out
+= b
'--' + boundary
.encode('ascii') + b
'--\r\n'
3186 return out
, content_type
3189 def multipart_encode(data
, boundary
=None):
3191 Encode a dict to RFC 7578-compliant form-data
3194 A dict where keys and values can be either Unicode or bytes-like
3197 If specified a Unicode object, it's used as the boundary. Otherwise
3198 a random boundary is generated.
3200 Reference: https://tools.ietf.org/html/rfc7578
3202 has_specified_boundary
= boundary
is not None
3205 if boundary
is None:
3206 boundary
= '---------------' + str(random
.randrange(0x0fffffff, 0xffffffff))
3209 out
, content_type
= _multipart_encode_impl(data
, boundary
)
3212 if has_specified_boundary
:
3216 return out
, content_type
3219 def is_iterable_like(x
, allowed_types
=collections
.abc
.Iterable
, blocked_types
=NO_DEFAULT
):
3220 if blocked_types
is NO_DEFAULT
:
3221 blocked_types
= (str, bytes, collections
.abc
.Mapping
)
3222 return isinstance(x
, allowed_types
) and not isinstance(x
, blocked_types
)
3225 def variadic(x
, allowed_types
=NO_DEFAULT
):
3226 return x
if is_iterable_like(x
, blocked_types
=allowed_types
) else (x
, )
3229 def try_call(*funcs
, expected_type
=None, args
=[], kwargs
={}):
3232 val
= f(*args
, **kwargs
)
3233 except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
3236 if expected_type
is None or isinstance(val
, expected_type
):
3240 def try_get(src
, getter
, expected_type
=None):
3241 return try_call(*variadic(getter
), args
=(src
,), expected_type
=expected_type
)
3244 def filter_dict(dct
, cndn
=lambda _
, v
: v
is not None):
3245 return {k: v for k, v in dct.items() if cndn(k, v)}
3248 def merge_dicts(*dicts
):
3250 for a_dict
in dicts
:
3251 for k
, v
in a_dict
.items():
3252 if (v
is not None and k
not in merged
3253 or isinstance(v
, str) and merged
[k
] == ''):
3258 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'):
3259 return string
if isinstance(string
, str) else str(string
, encoding
, errors
)
3271 TV_PARENTAL_GUIDELINES
= {
3281 def parse_age_limit(s
):
3282 # isinstance(False, int) is True. So type() must be used instead
3283 if type(s
) is int: # noqa: E721
3284 return s
if 0 <= s
<= 21 else None
3285 elif not isinstance(s
, str):
3287 m
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
)
3289 return int(m
.group('age'))
3292 return US_RATINGS
[s
]
3293 m
= re
.match(r
'^TV[_-]?(%s)$' % '|'.join(k
[3:] for k
in TV_PARENTAL_GUIDELINES
), s
)
3295 return TV_PARENTAL_GUIDELINES
['TV-' + m
.group(1)]
3299 def strip_jsonp(code
):
3302 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3303 (?:\s*&&\s*(?P=func_name))?
3304 \s*\(\s*(?P<callback_data>.*)\);?
3305 \s*?(?://[^\n]*)*$''',
3306 r
'\g<callback_data>', code
)
3309 def js_to_json(code
, vars={}, *, strict
=False):
3310 # vars is a dict of var, val pairs to substitute
3311 STRING_QUOTES
= '\'"`'
3312 STRING_RE
= '|'.join(rf
'{q}(?:\\.|[^\\{q}])*{q}' for q
in STRING_QUOTES
)
3313 COMMENT_RE
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3314 SKIP_RE
= fr
'\s*(?:{COMMENT_RE})?\s*'
3316 (fr
'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3317 (fr
'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3320 def process_escape(match
):
3321 JSON_PASSTHROUGH_ESCAPES
= R
'"\bfnrtu'
3322 escape
= match
.group(1) or match
.group(2)
3324 return (Rf
'\{escape}' if escape
in JSON_PASSTHROUGH_ESCAPES
3325 else R
'\u00' if escape
== 'x'
3326 else '' if escape
== '\n'
3329 def template_substitute(match
):
3330 evaluated
= js_to_json(match
.group(1), vars, strict
=strict
)
3331 if evaluated
[0] == '"':
3332 return json
.loads(evaluated
)
3337 if v
in ('true', 'false', 'null'):
3339 elif v
in ('undefined', 'void 0'):
3341 elif v
.startswith('/*') or v
.startswith('//') or v
.startswith('!') or v
== ',':
3344 if v
[0] in STRING_QUOTES
:
3345 v
= re
.sub(r
'(?s)\${([^}]+)}', template_substitute
, v
[1:-1]) if v
[0] == '`' else v
[1:-1]
3346 escaped
= re
.sub(r
'(?s)(")|\\(.)', process_escape
, v
)
3347 return f
'"{escaped}"'
3349 for regex
, base
in INTEGER_TABLE
:
3350 im
= re
.match(regex
, v
)
3352 i
= int(im
.group(1), base
)
3353 return f
'"{i}":' if v
.endswith(':') else str(i
)
3359 except json
.JSONDecodeError
:
3360 return json
.dumps(vars[v
])
3367 raise ValueError(f
'Unknown value: {v}')
3369 def create_map(mobj
):
3370 return json
.dumps(dict(json
.loads(js_to_json(mobj
.group(1) or '[]', vars=vars))))
3372 code
= re
.sub(r
'new Map\((\[.*?\])?\)', create_map
, code
)
3374 code
= re
.sub(r
'new Date\((".+")\)', r
'\g<1>', code
)
3375 code
= re
.sub(r
'new \w+\((.*?)\)', lambda m
: json
.dumps(m
.group(0)), code
)
3376 code
= re
.sub(r
'parseInt\([^\d]+(\d+)[^\d]+\)', r
'\1', code
)
3377 code
= re
.sub(r
'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^
)]*["\'])\s*\)', r'\1', code)
3379 return re.sub(rf'''(?sx)
3381 {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
3382 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3383 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
3384 [0-9]+(?={SKIP_RE}:)|
3389 def qualities(quality_ids):
3390 """ Get a numeric quality value out of a list of possible values """
3393 return quality_ids.index(qid)
3399 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3403 'default': '%(title)s [%(id)s].%(ext)s',
3404 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3410 'description': 'description',
3411 'annotation': 'annotations.xml',
3412 'infojson': 'info.json',
3415 'pl_thumbnail': None,
3416 'pl_description': 'description',
3417 'pl_infojson': 'info.json',
3420 # As of [1] format syntax is:
3421 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3422 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3423 STR_FORMAT_RE_TMPL = r'''(?x)
3424 (?<!%)(?P<prefix>(?:%%)*)
3426 (?P<has_key>\((?P<key>{0})\))?
3428 (?P<conversion>[#0\-+ ]+)?
3430 (?P<precision>\.\d+)?
3431 (?P<len_mod>[hlL])? # unused in python
3432 {1} # conversion type
3437 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3440 def limit_length(s, length):
3441 """ Add ellipses to overly long strings """
3446 return s[:length - len(ELLIPSES)] + ELLIPSES
3450 def version_tuple(v):
3451 return tuple(int(e) for e in re.split(r'[-.]', v))
3454 def is_outdated_version(version, limit, assume_new=True):
3456 return not assume_new
3458 return version_tuple(version) < version_tuple(limit)
3460 return not assume_new
3463 def ytdl_is_updateable():
3464 """ Returns if yt-dlp can be updated with -U """
3466 from ..update import is_non_updateable
3468 return not is_non_updateable()
3471 def args_to_str(args):
3472 # Get a short string representation for a subprocess command
3473 return ' '.join(compat_shlex_quote(a) for a in args)
3476 def error_to_str(err):
3477 return f'{type(err).__name__}: {err}'
3480 def mimetype2ext(mt, default=NO_DEFAULT):
3481 if not isinstance(mt, str):
3482 if default is not NO_DEFAULT:
3498 'x-matroska': 'mkv',
3500 'x-mp4-fragmented': 'mp4',
3505 # application (streaming playlists)
3509 'vnd.apple.mpegurl': 'm3u8',
3510 'vnd.ms-sstr+xml': 'ism',
3511 'x-mpegurl': 'm3u8',
3515 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
3516 # Using .mp3 as it's the most popular one
3517 'audio/mpeg': 'mp3',
3518 'audio/webm': 'webm',
3519 'audio/x-matroska': 'mka',
3520 'audio/x-mpegurl': 'm3u',
3528 'x-realaudio': 'ra',
3539 'vnd.wap.wbmp': 'wbmp',
3546 'filmstrip+json': 'fs',
3547 'smptett+xml': 'tt',
3550 'x-ms-sami': 'sami',
3559 mimetype = mt.partition(';')[0].strip().lower()
3560 _, _, subtype = mimetype.rpartition('/')
3562 ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
3565 elif default is not NO_DEFAULT:
3567 return subtype.replace('+', '.')
3570 def ext2mimetype(ext_or_url):
3573 if '.' not in ext_or_url:
3574 ext_or_url = f'file.{ext_or_url}'
3575 return mimetypes.guess_type(ext_or_url)[0]
3578 def parse_codecs(codecs_str):
3579 # http://tools.ietf.org/html/rfc6381
3582 split_codecs = list(filter(None, map(
3583 str.strip, codecs_str.strip().strip(',').split(','))))
3584 vcodec, acodec, scodec, hdr = None, None, None, None
3585 for full_codec in split_codecs:
3586 parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
3587 if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3588 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3592 if parts[0] in ('dvh1', 'dvhe'):
3594 elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
3596 elif parts[:2] == ['vp9', '2']:
3598 elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
3599 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3600 acodec = acodec or full_codec
3601 elif parts[0] in ('stpp', 'wvtt'):
3602 scodec = scodec or full_codec
3604 write_string(f'WARNING: Unknown codec {full_codec}\n')
3605 if vcodec or acodec or scodec:
3607 'vcodec': vcodec or 'none',
3608 'acodec': acodec or 'none',
3609 'dynamic_range': hdr,
3610 **({'scodec': scodec} if scodec is not None else {}),
3612 elif len(split_codecs) == 2:
3614 'vcodec': split_codecs[0],
3615 'acodec': split_codecs[1],
3620 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
3621 assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
3623 allow_mkv = not preferences or 'mkv' in preferences
3625 if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3626 return 'mkv' # TODO: any other format allows this?
3628 # TODO: All codecs supported by parse_codecs isn't handled here
3629 COMPATIBLE_CODECS = {
3631 'av1', 'hevc', 'avc1', 'mp4a', 'ac-4', # fourcc (m3u8, mpd)
3632 'h264', 'aacl', 'ec-3', # Set in ISM
3635 'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3636 'vp9x', 'vp8x', # in the webm spec
3640 sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
3641 vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3643 for ext in preferences or COMPATIBLE_CODECS.keys():
3644 codec_set = COMPATIBLE_CODECS.get(ext, set())
3645 if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3649 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3652 for ext in preferences or vexts:
3653 current_exts = {ext, *vexts, *aexts}
3654 if ext == 'mkv' or current_exts == {ext} or any(
3655 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3657 return 'mkv' if allow_mkv else preferences[-1]
3660 def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
3661 getheader = url_handle.headers.get
3663 cd = getheader('Content-Disposition')
3665 m = re.match(r'attachment;\s*filename="(?P
<filename
>[^
"]+)"', cd)
3667 e = determine_ext(m.group('filename
'), default_ext=None)
3671 meta_ext = getheader('x
-amz
-meta
-name
')
3673 e = meta_ext.rpartition('.')[2]
3677 return mimetype2ext(getheader('Content
-Type
'), default=default)
3680 def encode_data_uri(data, mime_type):
3681 return 'data
:%s;base64
,%s' % (mime_type, base64.b64encode(data).decode('ascii
'))
3684 def age_restricted(content_limit, age_limit):
3685 """ Returns True iff the content should be blocked """
3687 if age_limit is None: # No limit set
3689 if content_limit is None:
3690 return False # Content available for everyone
3691 return age_limit < content_limit
3694 # List of known byte-order-marks (BOM)
3696 (b'\xef\xbb\xbf', 'utf
-8'),
3697 (b'\x00\x00\xfe\xff', 'utf
-32-be
'),
3698 (b'\xff\xfe\x00\x00', 'utf
-32-le
'),
3699 (b'\xff\xfe', 'utf
-16-le
'),
3700 (b'\xfe\xff', 'utf
-16-be
'),
3704 def is_html(first_bytes):
3705 """ Detect whether a file contains HTML by examining its first bytes. """
3708 for bom, enc in BOMS:
3709 while first_bytes.startswith(bom):
3710 encoding, first_bytes = enc, first_bytes[len(bom):]
3712 return re.match(r'^\s
*<', first_bytes.decode(encoding, 'replace
'))
3715 def determine_protocol(info_dict):
3716 protocol = info_dict.get('protocol
')
3717 if protocol is not None:
3720 url = sanitize_url(info_dict['url
'])
3721 if url.startswith('rtmp
'):
3723 elif url.startswith('mms
'):
3725 elif url.startswith('rtsp
'):
3728 ext = determine_ext(url)
3730 return 'm3u8
' if info_dict.get('is_live
') else 'm3u8_native
'
3734 return urllib.parse.urlparse(url).scheme
3737 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3738 """ Render a list of rows, each as a list of values.
3739 Text after a \t will be right aligned """
3741 return len(remove_terminal_sequences(string).replace('\t', ''))
3743 def get_max_lens(table):
3744 return [max(width(str(v)) for v in col) for col in zip(*table)]
3746 def filter_using_list(row, filterArray):
3747 return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3749 max_lens = get_max_lens(data) if hide_empty else []
3750 header_row = filter_using_list(header_row, max_lens)
3751 data = [filter_using_list(row, max_lens) for row in data]
3753 table = [header_row] + data
3754 max_lens = get_max_lens(table)
3757 table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3758 table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
3760 for pos, text in enumerate(map(str, row)):
3762 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3764 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3765 ret = '\n'.join(''.join(row).rstrip() for row in table)
3769 def _match_one(filter_part, dct, incomplete):
3770 # TODO: Generalize code with YoutubeDL._build_format_filter
3771 STRING_OPERATORS = {
3772 '*=': operator.contains,
3773 '^
=': lambda attr, value: attr.startswith(value),
3774 '$
=': lambda attr, value: attr.endswith(value),
3775 '~
=': lambda attr, value: re.search(value, attr),
3777 COMPARISON_OPERATORS = {
3779 '<=': operator.le, # "<=" must be defined above "<"
3786 if isinstance(incomplete, bool):
3787 is_incomplete = lambda _: incomplete
3789 is_incomplete = lambda k: k in incomplete
3791 operator_rex = re.compile(r'''(?x)
3793 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3795 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3798 ''' % '|
'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3799 m = operator_rex.fullmatch(filter_part.strip())
3802 unnegated_op = COMPARISON_OPERATORS[m['op
']]
3804 op = lambda attr, value: not unnegated_op(attr, value)
3807 comparison_value = m['quotedstrval
'] or m['strval
'] or m['intval
']
3809 comparison_value = comparison_value.replace(r'\
%s' % m['quote
'], m['quote
'])
3810 actual_value = dct.get(m['key
'])
3811 numeric_comparison = None
3812 if isinstance(actual_value, (int, float)):
3813 # If the original field is a string and matching comparisonvalue is
3814 # a number we should respect the origin of the original field
3815 # and process comparison value as a string (see
3816 # https://github.com/ytdl-org/youtube-dl/issues/11082)
3818 numeric_comparison = int(comparison_value)
3820 numeric_comparison = parse_filesize(comparison_value)
3821 if numeric_comparison is None:
3822 numeric_comparison = parse_filesize(f'{comparison_value}B
')
3823 if numeric_comparison is None:
3824 numeric_comparison = parse_duration(comparison_value)
3825 if numeric_comparison is not None and m['op
'] in STRING_OPERATORS:
3826 raise ValueError('Operator
%s only supports string values
!' % m['op
'])
3827 if actual_value is None:
3828 return is_incomplete(m['key
']) or m['none_inclusive
']
3829 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3832 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3833 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3835 operator_rex = re.compile(r'''(?x)
3836 (?P<op>%s)\s*(?P<key>[a-z_]+)
3837 ''' % '|
'.join(map(re.escape, UNARY_OPERATORS.keys())))
3838 m = operator_rex.fullmatch(filter_part.strip())
3840 op = UNARY_OPERATORS[m.group('op
')]
3841 actual_value = dct.get(m.group('key
'))
3842 if is_incomplete(m.group('key
')) and actual_value is None:
3844 return op(actual_value)
3846 raise ValueError('Invalid
filter part
%r' % filter_part)
3849 def match_str(filter_str, dct, incomplete=False):
3850 """ Filter a dictionary with a simple string syntax.
3851 @returns Whether the filter passes
3852 @param incomplete Set of keys that is expected to be missing from dct.
3853 Can be True/False to indicate all/none of the keys may be missing.
3854 All conditions on incomplete keys pass if the key is missing
3857 _match_one(filter_part.replace(r'\
&', '&'), dct, incomplete)
3858 for filter_part in re.split(r'(?
<!\\)&', filter_str))
3861 def match_filter_func(filters, breaking_filters=None):
3862 if not filters and not breaking_filters:
3864 breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
3865 filters = set(variadic(filters or []))
3867 interactive = '-' in filters
3871 def _match_func(info_dict, incomplete=False):
3872 ret = breaking_filters(info_dict, incomplete)
3874 raise RejectedVideoReached(ret)
3876 if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3877 return NO_DEFAULT if interactive and not incomplete else None
3879 video_title = info_dict.get('title
') or info_dict.get('id') or 'entry
'
3880 filter_str = ') |
('.join(map(str.strip, filters))
3881 return f'{video_title} does
not pass filter ({filter_str}
), skipping
..'
3885 class download_range_func:
3886 def __init__(self, chapters, ranges):
3887 self.chapters, self.ranges = chapters, ranges
3889 def __call__(self, info_dict, ydl):
3890 if not self.ranges and not self.chapters:
3893 warning = ('There are no chapters matching the regex
' if info_dict.get('chapters
')
3894 else 'Cannot match chapters since chapter information
is unavailable
')
3895 for regex in self.chapters or []:
3896 for i, chapter in enumerate(info_dict.get('chapters
') or []):
3897 if re.search(regex, chapter['title
']):
3899 yield {**chapter, 'index': i}
3900 if self.chapters and warning:
3901 ydl.to_screen(f'[info
] {info_dict["id"]}
: {warning}
')
3903 yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
3905 def __eq__(self, other):
3906 return (isinstance(other, download_range_func)
3907 and self.chapters == other.chapters and self.ranges == other.ranges)
3910 return f'{__name__}
.{type(self).__name__}
({self.chapters}
, {self.ranges}
)'
3913 def parse_dfxp_time_expr(time_expr):
3917 mobj = re.match(rf'^
(?P
<time_offset
>{NUMBER_RE}
)s?$
', time_expr)
3919 return float(mobj.group('time_offset
'))
3921 mobj = re.match(r'^
(\d
+):(\d\d
):(\d\
d(?
:(?
:\
.|
:)\d
+)?
)$
', time_expr)
3923 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3926 def srt_subtitles_timecode(seconds):
3927 return '%02d
:%02d
:%02d
,%03d
' % timetuple_from_msec(seconds * 1000)
3930 def ass_subtitles_timecode(seconds):
3931 time = timetuple_from_msec(seconds * 1000)
3932 return '%01d
:%02d
:%02d
.%02d
' % (*time[:-1], time.milliseconds / 10)
3935 def dfxp2srt(dfxp_data):
3937 @param dfxp_data A bytes-like object containing DFXP data
3938 @returns A unicode object containing converted SRT data
3940 LEGACY_NAMESPACES = (
3941 (b'http
://www
.w3
.org
/ns
/ttml
', [
3942 b'http
://www
.w3
.org
/2004/11/ttaf1
',
3943 b'http
://www
.w3
.org
/2006/04/ttaf1
',
3944 b'http
://www
.w3
.org
/2006/10/ttaf1
',
3946 (b'http
://www
.w3
.org
/ns
/ttml
#styling', [
3947 b
'http://www.w3.org/ns/ttml#style',
3951 SUPPORTED_STYLING
= [
3960 _x
= functools
.partial(xpath_with_ns
, ns_map
={
3961 'xml': 'http://www.w3.org/XML/1998/namespace',
3962 'ttml': 'http://www.w3.org/ns/ttml',
3963 'tts': 'http://www.w3.org/ns/ttml#styling',
3969 class TTMLPElementParser
:
3971 _unclosed_elements
= []
3972 _applied_styles
= []
3974 def start(self
, tag
, attrib
):
3975 if tag
in (_x('ttml:br'), 'br'):
3978 unclosed_elements
= []
3980 element_style_id
= attrib
.get('style')
3982 style
.update(default_style
)
3983 if element_style_id
:
3984 style
.update(styles
.get(element_style_id
, {}))
3985 for prop
in SUPPORTED_STYLING
:
3986 prop_val
= attrib
.get(_x('tts:' + prop
))
3988 style
[prop
] = prop_val
3991 for k
, v
in sorted(style
.items()):
3992 if self
._applied
_styles
and self
._applied
_styles
[-1].get(k
) == v
:
3995 font
+= ' color="%s"' % v
3996 elif k
== 'fontSize':
3997 font
+= ' size="%s"' % v
3998 elif k
== 'fontFamily':
3999 font
+= ' face="%s"' % v
4000 elif k
== 'fontWeight' and v
== 'bold':
4002 unclosed_elements
.append('b')
4003 elif k
== 'fontStyle' and v
== 'italic':
4005 unclosed_elements
.append('i')
4006 elif k
== 'textDecoration' and v
== 'underline':
4008 unclosed_elements
.append('u')
4010 self
._out
+= '<font' + font
+ '>'
4011 unclosed_elements
.append('font')
4013 if self
._applied
_styles
:
4014 applied_style
.update(self
._applied
_styles
[-1])
4015 applied_style
.update(style
)
4016 self
._applied
_styles
.append(applied_style
)
4017 self
._unclosed
_elements
.append(unclosed_elements
)
4020 if tag
not in (_x('ttml:br'), 'br'):
4021 unclosed_elements
= self
._unclosed
_elements
.pop()
4022 for element
in reversed(unclosed_elements
):
4023 self
._out
+= '</%s>' % element
4024 if unclosed_elements
and self
._applied
_styles
:
4025 self
._applied
_styles
.pop()
4027 def data(self
, data
):
4031 return self
._out
.strip()
4033 # Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
4034 # This will not trigger false positives since only UTF-8 text is being replaced
4035 dfxp_data
= dfxp_data
.replace(b
'encoding=\'UTF-16\'', b
'encoding=\'UTF-8\'')
4037 def parse_node(node
):
4038 target
= TTMLPElementParser()
4039 parser
= xml
.etree
.ElementTree
.XMLParser(target
=target
)
4040 parser
.feed(xml
.etree
.ElementTree
.tostring(node
))
4041 return parser
.close()
4043 for k
, v
in LEGACY_NAMESPACES
:
4045 dfxp_data
= dfxp_data
.replace(ns
, k
)
4047 dfxp
= compat_etree_fromstring(dfxp_data
)
4049 paras
= dfxp
.findall(_x('.//ttml:p')) or dfxp
.findall('.//p')
4052 raise ValueError('Invalid dfxp/TTML subtitle')
4056 for style
in dfxp
.findall(_x('.//ttml:style')):
4057 style_id
= style
.get('id') or style
.get(_x('xml:id'))
4060 parent_style_id
= style
.get('style')
4062 if parent_style_id
not in styles
:
4065 styles
[style_id
] = styles
[parent_style_id
].copy()
4066 for prop
in SUPPORTED_STYLING
:
4067 prop_val
= style
.get(_x('tts:' + prop
))
4069 styles
.setdefault(style_id
, {})[prop
] = prop_val
4075 for p
in ('body', 'div'):
4076 ele
= xpath_element(dfxp
, [_x('.//ttml:' + p
), './/' + p
])
4079 style
= styles
.get(ele
.get('style'))
4082 default_style
.update(style
)
4084 for para
, index
in zip(paras
, itertools
.count(1)):
4085 begin_time
= parse_dfxp_time_expr(para
.attrib
.get('begin'))
4086 end_time
= parse_dfxp_time_expr(para
.attrib
.get('end'))
4087 dur
= parse_dfxp_time_expr(para
.attrib
.get('dur'))
4088 if begin_time
is None:
4093 end_time
= begin_time
+ dur
4094 out
.append('%d\n%s --> %s\n%s\n\n' % (
4096 srt_subtitles_timecode(begin_time
),
4097 srt_subtitles_timecode(end_time
),
4103 def cli_option(params
, command_option
, param
, separator
=None):
4104 param
= params
.get(param
)
4105 return ([] if param
is None
4106 else [command_option
, str(param
)] if separator
is None
4107 else [f
'{command_option}{separator}{param}'])
4110 def cli_bool_option(params
, command_option
, param
, true_value
='true', false_value
='false', separator
=None):
4111 param
= params
.get(param
)
4112 assert param
in (True, False, None)
4113 return cli_option({True: true_value, False: false_value}
, command_option
, param
, separator
)
4116 def cli_valueless_option(params
, command_option
, param
, expected_value
=True):
4117 return [command_option
] if params
.get(param
) == expected_value
else []
4120 def cli_configuration_args(argdict
, keys
, default
=[], use_compat
=True):
4121 if isinstance(argdict
, (list, tuple)): # for backward compatibility
4128 assert isinstance(argdict
, dict)
4130 assert isinstance(keys
, (list, tuple))
4131 for key_list
in keys
:
4132 arg_list
= list(filter(
4133 lambda x
: x
is not None,
4134 [argdict
.get(key
.lower()) for key
in variadic(key_list
)]))
4136 return [arg
for args
in arg_list
for arg
in args
]
4140 def _configuration_args(main_key
, argdict
, exe
, keys
=None, default
=[], use_compat
=True):
4141 main_key
, exe
= main_key
.lower(), exe
.lower()
4142 root_key
= exe
if main_key
== exe
else f
'{main_key}+{exe}'
4143 keys
= [f
'{root_key}{k}' for k
in (keys
or [''])]
4144 if root_key
in keys
:
4146 keys
.append((main_key
, exe
))
4147 keys
.append('default')
4150 return cli_configuration_args(argdict
, keys
, default
, use_compat
)
4154 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4213 'iw': 'heb', # Replaced by he in 1989 revision
4223 'in': 'ind', # Replaced by id in 1989 revision
4338 'ji': 'yid', # Replaced by yi in 1989 revision
4346 def short2long(cls
, code
):
4347 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4348 return cls
._lang
_map
.get(code
[:2])
4351 def long2short(cls
, code
):
4352 """Convert language code from ISO 639-2/T to ISO 639-1"""
4353 for short_name
, long_name
in cls
._lang
_map
.items():
4354 if long_name
== code
:
4359 # From http://data.okfn.org/data/core/country-list
4361 'AF': 'Afghanistan',
4362 'AX': 'Åland Islands',
4365 'AS': 'American Samoa',
4370 'AG': 'Antigua and Barbuda',
4387 'BO': 'Bolivia, Plurinational State of',
4388 'BQ': 'Bonaire, Sint Eustatius and Saba',
4389 'BA': 'Bosnia and Herzegovina',
4391 'BV': 'Bouvet Island',
4393 'IO': 'British Indian Ocean Territory',
4394 'BN': 'Brunei Darussalam',
4396 'BF': 'Burkina Faso',
4402 'KY': 'Cayman Islands',
4403 'CF': 'Central African Republic',
4407 'CX': 'Christmas Island',
4408 'CC': 'Cocos (Keeling) Islands',
4412 'CD': 'Congo, the Democratic Republic of the',
4413 'CK': 'Cook Islands',
4415 'CI': 'Côte d\'Ivoire',
4420 'CZ': 'Czech Republic',
4424 'DO': 'Dominican Republic',
4427 'SV': 'El Salvador',
4428 'GQ': 'Equatorial Guinea',
4432 'FK': 'Falkland Islands (Malvinas)',
4433 'FO': 'Faroe Islands',
4437 'GF': 'French Guiana',
4438 'PF': 'French Polynesia',
4439 'TF': 'French Southern Territories',
4454 'GW': 'Guinea-Bissau',
4457 'HM': 'Heard Island and McDonald Islands',
4458 'VA': 'Holy See (Vatican City State)',
4465 'IR': 'Iran, Islamic Republic of',
4468 'IM': 'Isle of Man',
4478 'KP': 'Korea, Democratic People\'s Republic of',
4479 'KR': 'Korea, Republic of',
4482 'LA': 'Lao People\'s Democratic Republic',
4488 'LI': 'Liechtenstein',
4492 'MK': 'Macedonia, the Former Yugoslav Republic of',
4499 'MH': 'Marshall Islands',
4505 'FM': 'Micronesia, Federated States of',
4506 'MD': 'Moldova, Republic of',
4517 'NL': 'Netherlands',
4518 'NC': 'New Caledonia',
4519 'NZ': 'New Zealand',
4524 'NF': 'Norfolk Island',
4525 'MP': 'Northern Mariana Islands',
4530 'PS': 'Palestine, State of',
4532 'PG': 'Papua New Guinea',
4535 'PH': 'Philippines',
4539 'PR': 'Puerto Rico',
4543 'RU': 'Russian Federation',
4545 'BL': 'Saint Barthélemy',
4546 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4547 'KN': 'Saint Kitts and Nevis',
4548 'LC': 'Saint Lucia',
4549 'MF': 'Saint Martin (French part)',
4550 'PM': 'Saint Pierre and Miquelon',
4551 'VC': 'Saint Vincent and the Grenadines',
4554 'ST': 'Sao Tome and Principe',
4555 'SA': 'Saudi Arabia',
4559 'SL': 'Sierra Leone',
4561 'SX': 'Sint Maarten (Dutch part)',
4564 'SB': 'Solomon Islands',
4566 'ZA': 'South Africa',
4567 'GS': 'South Georgia and the South Sandwich Islands',
4568 'SS': 'South Sudan',
4573 'SJ': 'Svalbard and Jan Mayen',
4576 'CH': 'Switzerland',
4577 'SY': 'Syrian Arab Republic',
4578 'TW': 'Taiwan, Province of China',
4580 'TZ': 'Tanzania, United Republic of',
4582 'TL': 'Timor-Leste',
4586 'TT': 'Trinidad and Tobago',
4589 'TM': 'Turkmenistan',
4590 'TC': 'Turks and Caicos Islands',
4594 'AE': 'United Arab Emirates',
4595 'GB': 'United Kingdom',
4596 'US': 'United States',
4597 'UM': 'United States Minor Outlying Islands',
4601 'VE': 'Venezuela, Bolivarian Republic of',
4603 'VG': 'Virgin Islands, British',
4604 'VI': 'Virgin Islands, U.S.',
4605 'WF': 'Wallis and Futuna',
4606 'EH': 'Western Sahara',
4610 # Not ISO 3166 codes, but used for IP blocks
4611 'AP': 'Asia/Pacific Region',
4616 def short2full(cls
, code
):
4617 """Convert an ISO 3166-2 country code to the corresponding full name"""
4618 return cls
._country
_map
.get(code
.upper())
4622 # Major IPv4 address blocks per country
4624 'AD': '46.172.224.0/19',
4625 'AE': '94.200.0.0/13',
4626 'AF': '149.54.0.0/17',
4627 'AG': '209.59.64.0/18',
4628 'AI': '204.14.248.0/21',
4629 'AL': '46.99.0.0/16',
4630 'AM': '46.70.0.0/15',
4631 'AO': '105.168.0.0/13',
4632 'AP': '182.50.184.0/21',
4633 'AQ': '23.154.160.0/24',
4634 'AR': '181.0.0.0/12',
4635 'AS': '202.70.112.0/20',
4636 'AT': '77.116.0.0/14',
4637 'AU': '1.128.0.0/11',
4638 'AW': '181.41.0.0/18',
4639 'AX': '185.217.4.0/22',
4640 'AZ': '5.197.0.0/16',
4641 'BA': '31.176.128.0/17',
4642 'BB': '65.48.128.0/17',
4643 'BD': '114.130.0.0/16',
4645 'BF': '102.178.0.0/15',
4646 'BG': '95.42.0.0/15',
4647 'BH': '37.131.0.0/17',
4648 'BI': '154.117.192.0/18',
4649 'BJ': '137.255.0.0/16',
4650 'BL': '185.212.72.0/23',
4651 'BM': '196.12.64.0/18',
4652 'BN': '156.31.0.0/16',
4653 'BO': '161.56.0.0/16',
4654 'BQ': '161.0.80.0/20',
4655 'BR': '191.128.0.0/12',
4656 'BS': '24.51.64.0/18',
4657 'BT': '119.2.96.0/19',
4658 'BW': '168.167.0.0/16',
4659 'BY': '178.120.0.0/13',
4660 'BZ': '179.42.192.0/18',
4661 'CA': '99.224.0.0/11',
4662 'CD': '41.243.0.0/16',
4663 'CF': '197.242.176.0/21',
4664 'CG': '160.113.0.0/16',
4665 'CH': '85.0.0.0/13',
4666 'CI': '102.136.0.0/14',
4667 'CK': '202.65.32.0/19',
4668 'CL': '152.172.0.0/14',
4669 'CM': '102.244.0.0/14',
4670 'CN': '36.128.0.0/10',
4671 'CO': '181.240.0.0/12',
4672 'CR': '201.192.0.0/12',
4673 'CU': '152.206.0.0/15',
4674 'CV': '165.90.96.0/19',
4675 'CW': '190.88.128.0/17',
4676 'CY': '31.153.0.0/16',
4677 'CZ': '88.100.0.0/14',
4679 'DJ': '197.241.0.0/17',
4680 'DK': '87.48.0.0/12',
4681 'DM': '192.243.48.0/20',
4682 'DO': '152.166.0.0/15',
4683 'DZ': '41.96.0.0/12',
4684 'EC': '186.68.0.0/15',
4685 'EE': '90.190.0.0/15',
4686 'EG': '156.160.0.0/11',
4687 'ER': '196.200.96.0/20',
4688 'ES': '88.0.0.0/11',
4689 'ET': '196.188.0.0/14',
4690 'EU': '2.16.0.0/13',
4691 'FI': '91.152.0.0/13',
4692 'FJ': '144.120.0.0/16',
4693 'FK': '80.73.208.0/21',
4694 'FM': '119.252.112.0/20',
4695 'FO': '88.85.32.0/19',
4697 'GA': '41.158.0.0/15',
4699 'GD': '74.122.88.0/21',
4700 'GE': '31.146.0.0/16',
4701 'GF': '161.22.64.0/18',
4702 'GG': '62.68.160.0/19',
4703 'GH': '154.160.0.0/12',
4704 'GI': '95.164.0.0/16',
4705 'GL': '88.83.0.0/19',
4706 'GM': '160.182.0.0/15',
4707 'GN': '197.149.192.0/18',
4708 'GP': '104.250.0.0/19',
4709 'GQ': '105.235.224.0/20',
4710 'GR': '94.64.0.0/13',
4711 'GT': '168.234.0.0/16',
4712 'GU': '168.123.0.0/16',
4713 'GW': '197.214.80.0/20',
4714 'GY': '181.41.64.0/18',
4715 'HK': '113.252.0.0/14',
4716 'HN': '181.210.0.0/16',
4717 'HR': '93.136.0.0/13',
4718 'HT': '148.102.128.0/17',
4719 'HU': '84.0.0.0/14',
4720 'ID': '39.192.0.0/10',
4721 'IE': '87.32.0.0/12',
4722 'IL': '79.176.0.0/13',
4723 'IM': '5.62.80.0/20',
4724 'IN': '117.192.0.0/10',
4725 'IO': '203.83.48.0/21',
4726 'IQ': '37.236.0.0/14',
4727 'IR': '2.176.0.0/12',
4728 'IS': '82.221.0.0/16',
4729 'IT': '79.0.0.0/10',
4730 'JE': '87.244.64.0/18',
4731 'JM': '72.27.0.0/17',
4732 'JO': '176.29.0.0/16',
4733 'JP': '133.0.0.0/8',
4734 'KE': '105.48.0.0/12',
4735 'KG': '158.181.128.0/17',
4736 'KH': '36.37.128.0/17',
4737 'KI': '103.25.140.0/22',
4738 'KM': '197.255.224.0/20',
4739 'KN': '198.167.192.0/19',
4740 'KP': '175.45.176.0/22',
4741 'KR': '175.192.0.0/10',
4742 'KW': '37.36.0.0/14',
4743 'KY': '64.96.0.0/15',
4744 'KZ': '2.72.0.0/13',
4745 'LA': '115.84.64.0/18',
4746 'LB': '178.135.0.0/16',
4747 'LC': '24.92.144.0/20',
4748 'LI': '82.117.0.0/19',
4749 'LK': '112.134.0.0/15',
4750 'LR': '102.183.0.0/16',
4751 'LS': '129.232.0.0/17',
4752 'LT': '78.56.0.0/13',
4753 'LU': '188.42.0.0/16',
4754 'LV': '46.109.0.0/16',
4755 'LY': '41.252.0.0/14',
4756 'MA': '105.128.0.0/11',
4757 'MC': '88.209.64.0/18',
4758 'MD': '37.246.0.0/16',
4759 'ME': '178.175.0.0/17',
4760 'MF': '74.112.232.0/21',
4761 'MG': '154.126.0.0/17',
4762 'MH': '117.103.88.0/21',
4763 'MK': '77.28.0.0/15',
4764 'ML': '154.118.128.0/18',
4765 'MM': '37.111.0.0/17',
4766 'MN': '49.0.128.0/17',
4767 'MO': '60.246.0.0/16',
4768 'MP': '202.88.64.0/20',
4769 'MQ': '109.203.224.0/19',
4770 'MR': '41.188.64.0/18',
4771 'MS': '208.90.112.0/22',
4772 'MT': '46.11.0.0/16',
4773 'MU': '105.16.0.0/12',
4774 'MV': '27.114.128.0/18',
4775 'MW': '102.70.0.0/15',
4776 'MX': '187.192.0.0/11',
4777 'MY': '175.136.0.0/13',
4778 'MZ': '197.218.0.0/15',
4779 'NA': '41.182.0.0/16',
4780 'NC': '101.101.0.0/18',
4781 'NE': '197.214.0.0/18',
4782 'NF': '203.17.240.0/22',
4783 'NG': '105.112.0.0/12',
4784 'NI': '186.76.0.0/15',
4785 'NL': '145.96.0.0/11',
4786 'NO': '84.208.0.0/13',
4787 'NP': '36.252.0.0/15',
4788 'NR': '203.98.224.0/19',
4789 'NU': '49.156.48.0/22',
4790 'NZ': '49.224.0.0/14',
4791 'OM': '5.36.0.0/15',
4792 'PA': '186.72.0.0/15',
4793 'PE': '186.160.0.0/14',
4794 'PF': '123.50.64.0/18',
4795 'PG': '124.240.192.0/19',
4796 'PH': '49.144.0.0/13',
4797 'PK': '39.32.0.0/11',
4798 'PL': '83.0.0.0/11',
4799 'PM': '70.36.0.0/20',
4800 'PR': '66.50.0.0/16',
4801 'PS': '188.161.0.0/16',
4802 'PT': '85.240.0.0/13',
4803 'PW': '202.124.224.0/20',
4804 'PY': '181.120.0.0/14',
4805 'QA': '37.210.0.0/15',
4806 'RE': '102.35.0.0/16',
4807 'RO': '79.112.0.0/13',
4808 'RS': '93.86.0.0/15',
4809 'RU': '5.136.0.0/13',
4810 'RW': '41.186.0.0/16',
4811 'SA': '188.48.0.0/13',
4812 'SB': '202.1.160.0/19',
4813 'SC': '154.192.0.0/11',
4814 'SD': '102.120.0.0/13',
4815 'SE': '78.64.0.0/12',
4816 'SG': '8.128.0.0/10',
4817 'SI': '188.196.0.0/14',
4818 'SK': '78.98.0.0/15',
4819 'SL': '102.143.0.0/17',
4820 'SM': '89.186.32.0/19',
4821 'SN': '41.82.0.0/15',
4822 'SO': '154.115.192.0/18',
4823 'SR': '186.179.128.0/17',
4824 'SS': '105.235.208.0/21',
4825 'ST': '197.159.160.0/19',
4826 'SV': '168.243.0.0/16',
4827 'SX': '190.102.0.0/20',
4829 'SZ': '41.84.224.0/19',
4830 'TC': '65.255.48.0/20',
4831 'TD': '154.68.128.0/19',
4832 'TG': '196.168.0.0/14',
4833 'TH': '171.96.0.0/13',
4834 'TJ': '85.9.128.0/18',
4835 'TK': '27.96.24.0/21',
4836 'TL': '180.189.160.0/20',
4837 'TM': '95.85.96.0/19',
4838 'TN': '197.0.0.0/11',
4839 'TO': '175.176.144.0/21',
4840 'TR': '78.160.0.0/11',
4841 'TT': '186.44.0.0/15',
4842 'TV': '202.2.96.0/19',
4843 'TW': '120.96.0.0/11',
4844 'TZ': '156.156.0.0/14',
4845 'UA': '37.52.0.0/14',
4846 'UG': '102.80.0.0/13',
4848 'UY': '167.56.0.0/13',
4849 'UZ': '84.54.64.0/18',
4850 'VA': '212.77.0.0/19',
4851 'VC': '207.191.240.0/21',
4852 'VE': '186.88.0.0/13',
4853 'VG': '66.81.192.0/20',
4854 'VI': '146.226.0.0/16',
4855 'VN': '14.160.0.0/11',
4856 'VU': '202.80.32.0/20',
4857 'WF': '117.20.32.0/21',
4858 'WS': '202.4.32.0/19',
4859 'YE': '134.35.0.0/16',
4860 'YT': '41.242.116.0/22',
4861 'ZA': '41.0.0.0/11',
4862 'ZM': '102.144.0.0/13',
4863 'ZW': '102.177.192.0/18',
4867 def random_ipv4(cls
, code_or_block
):
4868 if len(code_or_block
) == 2:
4869 block
= cls
._country
_ip
_map
.get(code_or_block
.upper())
4873 block
= code_or_block
4874 addr
, preflen
= block
.split('/')
4875 addr_min
= struct
.unpack('!L', socket
.inet_aton(addr
))[0]
4876 addr_max
= addr_min |
(0xffffffff >> int(preflen
))
4877 return str(socket
.inet_ntoa(
4878 struct
.pack('!L', random
.randint(addr_min
, addr_max
))))
4881 class PerRequestProxyHandler(urllib
.request
.ProxyHandler
):
4882 def __init__(self
, proxies
=None):
4883 # Set default handlers
4884 for type in ('http', 'https'):
4885 setattr(self
, '%s_open' % type,
4886 lambda r
, proxy
='__noproxy__', type=type, meth
=self
.proxy_open
:
4887 meth(r
, proxy
, type))
4888 urllib
.request
.ProxyHandler
.__init
__(self
, proxies
)
4890 def proxy_open(self
, req
, proxy
, type):
4891 req_proxy
= req
.headers
.get('Ytdl-request-proxy')
4892 if req_proxy
is not None:
4894 del req
.headers
['Ytdl-request-proxy']
4896 if proxy
== '__noproxy__':
4897 return None # No Proxy
4898 if urllib
.parse
.urlparse(proxy
).scheme
.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4899 req
.add_header('Ytdl-socks-proxy', proxy
)
4900 # yt-dlp's http/https handlers do wrapping the socket with socks
4902 return urllib
.request
.ProxyHandler
.proxy_open(
4903 self
, req
, proxy
, type)
4906 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4907 # released into Public Domain
4908 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4910 def long_to_bytes(n
, blocksize
=0):
4911 """long_to_bytes(n:long, blocksize:int) : string
4912 Convert a long integer to a byte string.
4914 If optional blocksize is given and greater than zero, pad the front of the
4915 byte string with binary zeros so that the length is a multiple of
4918 # after much testing, this algorithm was deemed to be the fastest
4922 s
= struct
.pack('>I', n
& 0xffffffff) + s
4924 # strip off leading zeros
4925 for i
in range(len(s
)):
4926 if s
[i
] != b
'\000'[0]:
4929 # only happens when n == 0
4933 # add back some pad bytes. this could be done more efficiently w.r.t. the
4934 # de-padding being done above, but sigh...
4935 if blocksize
> 0 and len(s
) % blocksize
:
4936 s
= (blocksize
- len(s
) % blocksize
) * b
'\000' + s
4940 def bytes_to_long(s
):
4941 """bytes_to_long(string) : long
4942 Convert a byte string to a long integer.
4944 This is (essentially) the inverse of long_to_bytes().
4949 extra
= (4 - length
% 4)
4950 s
= b
'\000' * extra
+ s
4951 length
= length
+ extra
4952 for i
in range(0, length
, 4):
4953 acc
= (acc
<< 32) + struct
.unpack('>I', s
[i
:i
+ 4])[0]
4957 def ohdave_rsa_encrypt(data
, exponent
, modulus
):
4959 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4962 data: data to encrypt, bytes-like object
4963 exponent, modulus: parameter e and N of RSA algorithm, both integer
4964 Output: hex string of encrypted data
4966 Limitation: supports one block encryption only
4969 payload
= int(binascii
.hexlify(data
[::-1]), 16)
4970 encrypted
= pow(payload
, exponent
, modulus
)
4971 return '%x' % encrypted
4974 def pkcs1pad(data
, length
):
4976 Padding input data with PKCS#1 scheme
4978 @param {int[]} data input data
4979 @param {int} length target length
4980 @returns {int[]} padded data
4982 if len(data
) > length
- 11:
4983 raise ValueError('Input data too long for PKCS#1 padding')
4985 pseudo_random
= [random
.randint(0, 254) for _
in range(length
- len(data
) - 3)]
4986 return [0, 2] + pseudo_random
+ [0] + data
4989 def _base_n_table(n
, table
):
4990 if not table
and not n
:
4991 raise ValueError('Either table or n must be specified')
4992 table
= (table
or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n
]
4994 if n
and n
!= len(table
):
4995 raise ValueError(f
'base {n} exceeds table length {len(table)}')
4999 def encode_base_n(num
, n
=None, table
=None):
5000 """Convert given int to a base-n string"""
5001 table
= _base_n_table(n
, table
)
5005 result
, base
= '', len(table
)
5007 result
= table
[num
% base
] + result
5012 def decode_base_n(string
, n
=None, table
=None):
5013 """Convert given base-n string to int"""
5014 table
= {char: index for index, char in enumerate(_base_n_table(n, table))}
5015 result
, base
= 0, len(table
)
5017 result
= result
* base
+ table
[char
]
5021 def decode_packed_codes(code
):
5022 mobj
= re
.search(PACKED_CODES_RE
, code
)
5023 obfuscated_code
, base
, count
, symbols
= mobj
.groups()
5026 symbols
= symbols
.split('|')
5031 base_n_count
= encode_base_n(count
, base
)
5032 symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
5035 r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)],
5039 def caesar(s
, alphabet
, shift
):
5044 alphabet
[(alphabet
.index(c
) + shift
) % l
] if c
in alphabet
else c
5049 return caesar(s
, r
'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5052 def parse_m3u8_attributes(attrib
):
5054 for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
):
5055 if val
.startswith('"'):
5061 def urshift(val
, n
):
5062 return val
>> n
if val
>= 0 else (val
+ 0x100000000) >> n
5065 def write_xattr(path
, key
, value
):
5066 # Windows: Write xattrs to NTFS Alternate Data Streams:
5067 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5068 if compat_os_name
== 'nt':
5069 assert ':' not in key
5070 assert os
.path
.exists(path
)
5073 with open(f
'{path}:{key}', 'wb') as f
:
5075 except OSError as e
:
5076 raise XAttrMetadataError(e
.errno
, e
.strerror
)
5079 # UNIX Method 1. Use xattrs/pyxattrs modules
5082 if getattr(xattr
, '_yt_dlp__identifier', None) == 'pyxattr':
5083 # Unicode arguments are not supported in pyxattr until version 0.5.0
5084 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5085 if version_tuple(xattr
.__version
__) >= (0, 5, 0):
5086 setxattr
= xattr
.set
5088 setxattr
= xattr
.setxattr
5092 setxattr(path
, key
, value
)
5093 except OSError as e
:
5094 raise XAttrMetadataError(e
.errno
, e
.strerror
)
5097 # UNIX Method 2. Use setfattr/xattr executables
5098 exe
= ('setfattr' if check_executable('setfattr', ['--version'])
5099 else 'xattr' if check_executable('xattr', ['-h']) else None)
5101 raise XAttrUnavailableError(
5102 'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
5103 + ('"xattr" binary' if sys
.platform
!= 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
5105 value
= value
.decode()
5107 _
, stderr
, returncode
= Popen
.run(
5108 [exe
, '-w', key
, value
, path
] if exe
== 'xattr' else [exe
, '-n', key
, '-v', value
, path
],
5109 text
=True, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
)
5110 except OSError as e
:
5111 raise XAttrMetadataError(e
.errno
, e
.strerror
)
5113 raise XAttrMetadataError(returncode
, stderr
)
5116 def random_birthday(year_field
, month_field
, day_field
):
5117 start_date
= datetime
.date(1950, 1, 1)
5118 end_date
= datetime
.date(1995, 12, 31)
5119 offset
= random
.randint(0, (end_date
- start_date
).days
)
5120 random_date
= start_date
+ datetime
.timedelta(offset
)
5122 year_field
: str(random_date
.year
),
5123 month_field
: str(random_date
.month
),
5124 day_field
: str(random_date
.day
),
5128 def find_available_port(interface
=''):
5130 with socket
.socket() as sock
:
5131 sock
.bind((interface
, 0))
5132 return sock
.getsockname()[1]
5137 # Templates for internet shortcut files, which are plain text files.
5138 DOT_URL_LINK_TEMPLATE
= '''\
5143 DOT_WEBLOC_LINK_TEMPLATE
= '''\
5144 <?xml version="1.0" encoding="UTF-8"?>
5145 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5146 <plist version="1.0">
5149 \t<string>%(url)s</string>
5154 DOT_DESKTOP_LINK_TEMPLATE
= '''\
5164 'url': DOT_URL_LINK_TEMPLATE
,
5165 'desktop': DOT_DESKTOP_LINK_TEMPLATE
,
5166 'webloc': DOT_WEBLOC_LINK_TEMPLATE
,
5170 def iri_to_uri(iri
):
5172 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5174 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5177 iri_parts
= urllib
.parse
.urlparse(iri
)
5179 if '[' in iri_parts
.netloc
:
5180 raise ValueError('IPv6 URIs are not, yet, supported.')
5181 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5183 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5186 if iri_parts
.username
:
5187 net_location
+= urllib
.parse
.quote(iri_parts
.username
, safe
=r
"!$%&'()*+,~")
5188 if iri_parts
.password
is not None:
5189 net_location
+= ':' + urllib
.parse
.quote(iri_parts
.password
, safe
=r
"!$%&'()*+,~")
5192 net_location
+= iri_parts
.hostname
.encode('idna').decode() # Punycode for Unicode hostnames.
5193 # The 'idna' encoding produces ASCII text.
5194 if iri_parts
.port
is not None and iri_parts
.port
!= 80:
5195 net_location
+= ':' + str(iri_parts
.port
)
5197 return urllib
.parse
.urlunparse(
5201 urllib
.parse
.quote_plus(iri_parts
.path
, safe
=r
"!$%&'()*+,/:;=@|~"),
5203 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5204 urllib
.parse
.quote_plus(iri_parts
.params
, safe
=r
"!$%&'()*+,/:;=@|~"),
5206 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5207 urllib
.parse
.quote_plus(iri_parts
.query
, safe
=r
"!$%&'()*+,/:;=?@{|}~"),
5209 urllib
.parse
.quote_plus(iri_parts
.fragment
, safe
=r
"!#$%&'()*+,/:;=?@{|}~")))
5211 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5214 def to_high_limit_path(path
):
5215 if sys
.platform
in ['win32', 'cygwin']:
5216 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5217 return '\\\\?\\' + os
.path
.abspath(path
)
5222 def format_field(obj
, field
=None, template
='%s', ignore
=NO_DEFAULT
, default
='', func
=IDENTITY
):
5223 val
= traversal
.traverse_obj(obj
, *variadic(field
))
5224 if not val
if ignore
is NO_DEFAULT
else val
in variadic(ignore
):
5226 return template
% func(val
)
5229 def clean_podcast_url(url
):
5230 return re
.sub(r
'''(?x)
5234 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5237 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5240 cn\.co| # https://podcorn.com/analytics-prefix/
5241 st\.fm # https://podsights.com/docs/
5246 _HEX_TABLE
= '0123456789abcdef'
5249 def random_uuidv4():
5250 return re
.sub(r
'[xy]', lambda x
: _HEX_TABLE
[random
.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5253 def make_dir(path
, to_screen
=None):
5255 dn
= os
.path
.dirname(path
)
5257 os
.makedirs(dn
, exist_ok
=True)
5259 except OSError as err
:
5260 if callable(to_screen
) is not None:
5261 to_screen(f
'unable to create directory {err}')
5265 def get_executable_path():
5266 from ..update
import _get_variant_and_executable_path
5268 return os
.path
.dirname(os
.path
.abspath(_get_variant_and_executable_path()[1]))
5271 def get_user_config_dirs(package_name
):
5272 # .config (e.g. ~/.config/package_name)
5273 xdg_config_home
= os
.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
5274 yield os
.path
.join(xdg_config_home
, package_name
)
5276 # appdata (%APPDATA%/package_name)
5277 appdata_dir
= os
.getenv('appdata')
5279 yield os
.path
.join(appdata_dir
, package_name
)
5281 # home (~/.package_name)
5282 yield os
.path
.join(compat_expanduser('~'), f
'.{package_name}')
5285 def get_system_config_dirs(package_name
):
5287 yield os
.path
.join('/etc', package_name
)
5290 def time_seconds(**kwargs
):
5292 Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
5294 return time
.time() + datetime
.timedelta(**kwargs
).total_seconds()
5297 # create a JSON Web Signature (jws) with HS256 algorithm
5298 # the resulting format is in JWS Compact Serialization
5299 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5300 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5301 def jwt_encode_hs256(payload_data
, key
, headers
={}):
5307 header_data
.update(headers
)
5308 header_b64
= base64
.b64encode(json
.dumps(header_data
).encode())
5309 payload_b64
= base64
.b64encode(json
.dumps(payload_data
).encode())
5310 h
= hmac
.new(key
.encode(), header_b64
+ b
'.' + payload_b64
, hashlib
.sha256
)
5311 signature_b64
= base64
.b64encode(h
.digest())
5312 token
= header_b64
+ b
'.' + payload_b64
+ b
'.' + signature_b64
5316 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5317 def jwt_decode_hs256(jwt
):
5318 header_b64
, payload_b64
, signature_b64
= jwt
.split('.')
5319 # add trailing ='s that may have been stripped, superfluous ='s are ignored
5320 payload_data
= json
.loads(base64
.urlsafe_b64decode(f
'{payload_b64}==='))
5324 WINDOWS_VT_MODE
= False if compat_os_name
== 'nt' else None
5328 def supports_terminal_sequences(stream
):
5329 if compat_os_name
== 'nt':
5330 if not WINDOWS_VT_MODE
:
5332 elif not os
.getenv('TERM'):
5335 return stream
.isatty()
5336 except BaseException
:
5340 def windows_enable_vt_mode():
5341 """Ref: https://bugs.python.org/issue30075 """
5342 if get_windows_version() < (10, 0, 10586):
5346 import ctypes
.wintypes
5349 ENABLE_VIRTUAL_TERMINAL_PROCESSING
= 0x0004
5351 dll
= ctypes
.WinDLL('kernel32', use_last_error
=False)
5352 handle
= os
.open('CONOUT$', os
.O_RDWR
)
5354 h_out
= ctypes
.wintypes
.HANDLE(msvcrt
.get_osfhandle(handle
))
5355 dw_original_mode
= ctypes
.wintypes
.DWORD()
5356 success
= dll
.GetConsoleMode(h_out
, ctypes
.byref(dw_original_mode
))
5358 raise Exception('GetConsoleMode failed')
5360 success
= dll
.SetConsoleMode(h_out
, ctypes
.wintypes
.DWORD(
5361 dw_original_mode
.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING
))
5363 raise Exception('SetConsoleMode failed')
5367 global WINDOWS_VT_MODE
5368 WINDOWS_VT_MODE
= True
5369 supports_terminal_sequences
.cache_clear()
5372 _terminal_sequences_re
= re
.compile('\033\\[[^m]+m')
5375 def remove_terminal_sequences(string
):
5376 return _terminal_sequences_re
.sub('', string
)
5379 def number_of_digits(number
):
5380 return len('%d' % number
)
5383 def join_nonempty(*values
, delim
='-', from_dict
=None):
5384 if from_dict
is not None:
5385 values
= (traversal
.traverse_obj(from_dict
, variadic(v
)) for v
in values
)
5386 return delim
.join(map(str, filter(None, values
)))
5389 def scale_thumbnails_to_max_format_width(formats
, thumbnails
, url_width_re
):
5391 Find the largest format dimensions in terms of video width and, for each thumbnail:
5392 * Modify the URL: Match the width with the provided regex and replace with the former width
5395 This function is useful with video services that scale the provided thumbnails on demand
5397 _keys
= ('width', 'height')
5398 max_dimensions
= max(
5399 (tuple(format
.get(k
) or 0 for k
in _keys
) for format
in formats
),
5401 if not max_dimensions
[0]:
5405 {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])}
,
5406 dict(zip(_keys
, max_dimensions
)), thumbnail
)
5407 for thumbnail
in thumbnails
5411 def parse_http_range(range):
5412 """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5414 return None, None, None
5415 crg
= re
.search(r
'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5417 return None, None, None
5418 return int(crg
.group(1)), int_or_none(crg
.group(2)), int_or_none(crg
.group(3))
5421 def read_stdin(what
):
5422 eof
= 'Ctrl+Z' if compat_os_name
== 'nt' else 'Ctrl+D'
5423 write_string(f
'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5427 def determine_file_encoding(data
):
5429 Detect the text encoding used
5430 @returns (encoding, bytes to skip)
5433 # BOM marks are given priority over declarations
5434 for bom
, enc
in BOMS
:
5435 if data
.startswith(bom
):
5436 return enc
, len(bom
)
5438 # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
5439 # We ignore the endianness to get a good enough match
5440 data
= data
.replace(b
'\0', b
'')
5441 mobj
= re
.match(rb
'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data
)
5442 return mobj
.group(1).decode() if mobj
else None, 0
5449 __initialized
= False
5451 def __init__(self
, parser
, label
=None):
5452 self
.parser
, self
.label
= parser
, label
5453 self
._loaded
_paths
, self
.configs
= set(), []
5455 def init(self
, args
=None, filename
=None):
5456 assert not self
.__initialized
5457 self
.own_args
, self
.filename
= args
, filename
5458 return self
.load_configs()
5460 def load_configs(self
):
5463 location
= os
.path
.realpath(self
.filename
)
5464 directory
= os
.path
.dirname(location
)
5465 if location
in self
._loaded
_paths
:
5467 self
._loaded
_paths
.add(location
)
5469 self
.__initialized
= True
5470 opts
, _
= self
.parser
.parse_known_args(self
.own_args
)
5471 self
.parsed_args
= self
.own_args
5472 for location
in opts
.config_locations
or []:
5474 if location
in self
._loaded
_paths
:
5476 self
._loaded
_paths
.add(location
)
5477 self
.append_config(shlex
.split(read_stdin('options'), comments
=True), label
='stdin')
5479 location
= os
.path
.join(directory
, expand_path(location
))
5480 if os
.path
.isdir(location
):
5481 location
= os
.path
.join(location
, 'yt-dlp.conf')
5482 if not os
.path
.exists(location
):
5483 self
.parser
.error(f
'config location {location} does not exist')
5484 self
.append_config(self
.read_file(location
), location
)
5488 label
= join_nonempty(
5489 self
.label
, 'config', f
'"{self.filename}"' if self
.filename
else '',
5491 return join_nonempty(
5492 self
.own_args
is not None and f
'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5493 *(f
'\n{c}'.replace('\n', '\n| ')[1:] for c
in self
.configs
),
5497 def read_file(filename
, default
=[]):
5499 optionf
= open(filename
, 'rb')
5501 return default
# silently skip if file is not present
5503 enc
, skip
= determine_file_encoding(optionf
.read(512))
5504 optionf
.seek(skip
, io
.SEEK_SET
)
5506 enc
= None # silently skip read errors
5508 # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5509 contents
= optionf
.read().decode(enc
or preferredencoding())
5510 res
= shlex
.split(contents
, comments
=True)
5511 except Exception as err
:
5512 raise ValueError(f
'Unable to parse "{filename}": {err}')
5518 def hide_login_info(opts
):
5519 PRIVATE_OPTS
= {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5520 eqre
= re
.compile('^(?P<key>' + ('|'.join(re
.escape(po
) for po
in PRIVATE_OPTS
)) + ')=.+$')
5525 return m
.group('key') + '=PRIVATE'
5529 opts
= list(map(_scrub_eq
, opts
))
5530 for idx
, opt
in enumerate(opts
):
5531 if opt
in PRIVATE_OPTS
and idx
+ 1 < len(opts
):
5532 opts
[idx
+ 1] = 'PRIVATE'
5535 def append_config(self
, *args
, label
=None):
5536 config
= type(self
)(self
.parser
, label
)
5537 config
._loaded
_paths
= self
._loaded
_paths
5538 if config
.init(*args
):
5539 self
.configs
.append(config
)
5543 for config
in reversed(self
.configs
):
5544 yield from config
.all_args
5545 yield from self
.parsed_args
or []
5547 def parse_known_args(self
, **kwargs
):
5548 return self
.parser
.parse_known_args(self
.all_args
, **kwargs
)
5550 def parse_args(self
):
5551 return self
.parser
.parse_args(self
.all_args
)
5554 class WebSocketsWrapper
:
5555 """Wraps websockets module to use in non-async scopes"""
5558 def __init__(self
, url
, headers
=None, connect
=True):
5559 self
.loop
= asyncio
.new_event_loop()
5560 # XXX: "loop" is deprecated
5561 self
.conn
= websockets
.connect(
5562 url
, extra_headers
=headers
, ping_interval
=None,
5563 close_timeout
=float('inf'), loop
=self
.loop
, ping_timeout
=float('inf'))
5566 atexit
.register(self
.__exit
__, None, None, None)
5568 def __enter__(self
):
5570 self
.pool
= self
.run_with_loop(self
.conn
.__aenter
__(), self
.loop
)
5573 def send(self
, *args
):
5574 self
.run_with_loop(self
.pool
.send(*args
), self
.loop
)
5576 def recv(self
, *args
):
5577 return self
.run_with_loop(self
.pool
.recv(*args
), self
.loop
)
5579 def __exit__(self
, type, value
, traceback
):
5581 return self
.run_with_loop(self
.conn
.__aexit
__(type, value
, traceback
), self
.loop
)
5584 self
._cancel
_all
_tasks
(self
.loop
)
5586 # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5587 # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5589 def run_with_loop(main
, loop
):
5590 if not asyncio
.iscoroutine(main
):
5591 raise ValueError(f
'a coroutine was expected, got {main!r}')
5594 return loop
.run_until_complete(main
)
5596 loop
.run_until_complete(loop
.shutdown_asyncgens())
5597 if hasattr(loop
, 'shutdown_default_executor'):
5598 loop
.run_until_complete(loop
.shutdown_default_executor())
5601 def _cancel_all_tasks(loop
):
5602 to_cancel
= asyncio
.all_tasks(loop
)
5607 for task
in to_cancel
:
5610 # XXX: "loop" is removed in python 3.10+
5611 loop
.run_until_complete(
5612 asyncio
.gather(*to_cancel
, loop
=loop
, return_exceptions
=True))
5614 for task
in to_cancel
:
5615 if task
.cancelled():
5617 if task
.exception() is not None:
5618 loop
.call_exception_handler({
5619 'message': 'unhandled exception during asyncio.run() shutdown',
5620 'exception': task
.exception(),
5625 def merge_headers(*dicts
):
5626 """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5627 return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5630 def cached_method(f
):
5631 """Cache a method"""
5632 signature
= inspect
.signature(f
)
5635 def wrapper(self
, *args
, **kwargs
):
5636 bound_args
= signature
.bind(self
, *args
, **kwargs
)
5637 bound_args
.apply_defaults()
5638 key
= tuple(bound_args
.arguments
.values())[1:]
5640 cache
= vars(self
).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {}
)
5641 if key
not in cache
:
5642 cache
[key
] = f(self
, *args
, **kwargs
)
5647 class classproperty
:
5648 """property access for class methods with optional caching"""
5649 def __new__(cls
, func
=None, *args
, **kwargs
):
5651 return functools
.partial(cls
, *args
, **kwargs
)
5652 return super().__new
__(cls
)
5654 def __init__(self
, func
, *, cache
=False):
5655 functools
.update_wrapper(self
, func
)
5657 self
._cache
= {} if cache
else None
5659 def __get__(self
, _
, cls
):
5660 if self
._cache
is None:
5661 return self
.func(cls
)
5662 elif cls
not in self
._cache
:
5663 self
._cache
[cls
] = self
.func(cls
)
5664 return self
._cache
[cls
]
5667 class function_with_repr
:
5668 def __init__(self
, func
, repr_
=None):
5669 functools
.update_wrapper(self
, func
)
5670 self
.func
, self
.__repr
= func
, repr_
5672 def __call__(self
, *args
, **kwargs
):
5673 return self
.func(*args
, **kwargs
)
5678 return f
'{self.func.__module__}.{self.func.__qualname__}'
5681 class Namespace(types
.SimpleNamespace
):
5682 """Immutable namespace"""
5685 return iter(self
.__dict
__.values())
5689 return self
.__dict
__.items()
5692 MEDIA_EXTENSIONS
= Namespace(
5693 common_video
=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
5694 video
=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
5695 common_audio
=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5696 audio
=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
5697 thumbnails
=('jpg', 'png', 'webp'),
5698 storyboards
=('mhtml', ),
5699 subtitles
=('srt', 'vtt', 'ass', 'lrc'),
5700 manifests
=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5702 MEDIA_EXTENSIONS
.video
+= MEDIA_EXTENSIONS
.common_video
5703 MEDIA_EXTENSIONS
.audio
+= MEDIA_EXTENSIONS
.common_audio
5705 KNOWN_EXTENSIONS
= (*MEDIA_EXTENSIONS
.video
, *MEDIA_EXTENSIONS
.audio
, *MEDIA_EXTENSIONS
.manifests
)
5710 for retry in RetryManager(...):
5713 except SomeException as err:
5717 attempt
, _error
= 0, None
5719 def __init__(self
, _retries
, _error_callback
, **kwargs
):
5720 self
.retries
= _retries
or 0
5721 self
.error_callback
= functools
.partial(_error_callback
, **kwargs
)
5723 def _should_retry(self
):
5724 return self
._error
is not NO_DEFAULT
and self
.attempt
<= self
.retries
5728 if self
._error
is NO_DEFAULT
:
5733 def error(self
, value
):
5737 while self
._should
_retry
():
5738 self
.error
= NO_DEFAULT
5742 self
.error_callback(self
.error
, self
.attempt
, self
.retries
)
5745 def report_retry(e
, count
, retries
, *, sleep_func
, info
, warn
, error
=None, suffix
=None):
5746 """Utility function for reporting retries"""
5749 return error(f
'{e}. Giving up after {count - 1} retries') if count
> 1 else error(str(e
))
5754 elif isinstance(e
, ExtractorError
):
5755 e
= remove_end(str_or_none(e
.cause
) or e
.orig_msg
, '.')
5756 warn(f
'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5758 delay
= float_or_none(sleep_func(n
=count
- 1)) if callable(sleep_func
) else sleep_func
5760 info(f
'Sleeping {delay:.2f} seconds ...')
5764 def make_archive_id(ie
, video_id
):
5765 ie_key
= ie
if isinstance(ie
, str) else ie
.ie_key()
5766 return f
'{ie_key.lower()} {video_id}'
5769 def truncate_string(s
, left
, right
=0):
5770 assert left
> 3 and right
>= 0
5771 if s
is None or len(s
) <= left
+ right
:
5773 return f
'{s[:left-3]}...{s[-right:] if right else ""}'
5776 def orderedSet_from_options(options
, alias_dict
, *, use_regex
=False, start
=None):
5777 assert 'all' in alias_dict
, '"all" alias is required'
5778 requested
= list(start
or [])
5780 discard
= val
.startswith('-')
5784 if val
in alias_dict
:
5785 val
= alias_dict
[val
] if not discard
else [
5786 i
[1:] if i
.startswith('-') else f
'-{i}' for i
in alias_dict
[val
]]
5787 # NB: Do not allow regex in aliases for performance
5788 requested
= orderedSet_from_options(val
, alias_dict
, start
=requested
)
5791 current
= (filter(re
.compile(val
, re
.I
).fullmatch
, alias_dict
['all']) if use_regex
5792 else [val
] if val
in alias_dict
['all'] else None)
5794 raise ValueError(val
)
5797 for item
in current
:
5798 while item
in requested
:
5799 requested
.remove(item
)
5801 requested
.extend(current
)
5803 return orderedSet(requested
)
5807 regex
= r
' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
5809 default
= ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
5810 'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
5811 'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
5812 ytdl_default
= ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
5813 'height', 'width', 'proto', 'vext', 'abr', 'aext',
5814 'fps', 'fs_approx', 'source', 'id')
5817 'vcodec': {'type': 'ordered', 'regex': True,
5818 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
5819 'acodec': {'type': 'ordered', 'regex': True,
5820 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
5821 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
5822 'order': ['dv', '(hdr)?12', r
'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
5823 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
5824 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
5825 'vext': {'type': 'ordered', 'field': 'video_ext',
5826 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
5827 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
5828 'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
5829 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
5830 'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
5831 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}
,
5832 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
5833 'field': ('vcodec', 'acodec'),
5834 'function': lambda it
: int(any(v
!= 'none' for v
in it
))},
5835 'ie_pref': {'priority': True, 'type': 'extractor'}
,
5836 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)}
,
5837 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}
,
5838 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1}
,
5839 'quality': {'convert': 'float', 'default': -1}
,
5840 'filesize': {'convert': 'bytes'}
,
5841 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'}
,
5842 'id': {'convert': 'string', 'field': 'format_id'}
,
5843 'height': {'convert': 'float_none'}
,
5844 'width': {'convert': 'float_none'}
,
5845 'fps': {'convert': 'float_none'}
,
5846 'channels': {'convert': 'float_none', 'field': 'audio_channels'}
,
5847 'tbr': {'convert': 'float_none'}
,
5848 'vbr': {'convert': 'float_none'}
,
5849 'abr': {'convert': 'float_none'}
,
5850 'asr': {'convert': 'float_none'}
,
5851 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1}
,
5853 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')}
,
5854 'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True}
,
5855 'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')}
,
5856 'ext': {'type': 'combined', 'field': ('vext', 'aext')}
,
5857 'res': {'type': 'multiple', 'field': ('height', 'width'),
5858 'function': lambda it
: (lambda l
: min(l
) if l
else 0)(tuple(filter(None, it
)))},
5860 # Actual field names
5861 'format_id': {'type': 'alias', 'field': 'id'}
,
5862 'preference': {'type': 'alias', 'field': 'ie_pref'}
,
5863 'language_preference': {'type': 'alias', 'field': 'lang'}
,
5864 'source_preference': {'type': 'alias', 'field': 'source'}
,
5865 'protocol': {'type': 'alias', 'field': 'proto'}
,
5866 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}
,
5867 'audio_channels': {'type': 'alias', 'field': 'channels'}
,
5870 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True}
,
5871 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True}
,
5872 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True}
,
5873 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True}
,
5874 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True}
,
5875 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True}
,
5876 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True}
,
5877 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True}
,
5878 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True}
,
5879 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True}
,
5880 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True}
,
5881 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True}
,
5882 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True}
,
5883 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True}
,
5884 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}
,
5885 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}
,
5886 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}
,
5887 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}
,
5888 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}
,
5889 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}
,
5892 def __init__(self
, ydl
, field_preference
):
5895 self
.evaluate_params(self
.ydl
.params
, field_preference
)
5896 if ydl
.params
.get('verbose'):
5897 self
.print_verbose_info(self
.ydl
.write_debug
)
5899 def _get_field_setting(self
, field
, key
):
5900 if field
not in self
.settings
:
5901 if key
in ('forced', 'priority'):
5903 self
.ydl
.deprecated_feature(f
'Using arbitrary fields ({field}) for format sorting is '
5904 'deprecated and may be removed in a future version')
5905 self
.settings
[field
] = {}
5906 propObj
= self
.settings
[field
]
5907 if key
not in propObj
:
5908 type = propObj
.get('type')
5910 default
= 'preference' if type == 'extractor' else (field
,) if type in ('combined', 'multiple') else field
5911 elif key
== 'convert':
5912 default
= 'order' if type == 'ordered' else 'float_string' if field
else 'ignore'
5914 default
= {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}
.get(key
, None)
5915 propObj
[key
] = default
5918 def _resolve_field_value(self
, field
, value
, convertNone
=False):
5923 value
= value
.lower()
5924 conversion
= self
._get
_field
_setting
(field
, 'convert')
5925 if conversion
== 'ignore':
5927 if conversion
== 'string':
5929 elif conversion
== 'float_none':
5930 return float_or_none(value
)
5931 elif conversion
== 'bytes':
5932 return parse_bytes(value
)
5933 elif conversion
== 'order':
5934 order_list
= (self
._use
_free
_order
and self
._get
_field
_setting
(field
, 'order_free')) or self
._get
_field
_setting
(field
, 'order')
5935 use_regex
= self
._get
_field
_setting
(field
, 'regex')
5936 list_length
= len(order_list
)
5937 empty_pos
= order_list
.index('') if '' in order_list
else list_length
+ 1
5938 if use_regex
and value
is not None:
5939 for i
, regex
in enumerate(order_list
):
5940 if regex
and re
.match(regex
, value
):
5941 return list_length
- i
5942 return list_length
- empty_pos
# not in list
5943 else: # not regex or value = None
5944 return list_length
- (order_list
.index(value
) if value
in order_list
else empty_pos
)
5946 if value
.isnumeric():
5949 self
.settings
[field
]['convert'] = 'string'
5952 def evaluate_params(self
, params
, sort_extractor
):
5953 self
._use
_free
_order
= params
.get('prefer_free_formats', False)
5954 self
._sort
_user
= params
.get('format_sort', [])
5955 self
._sort
_extractor
= sort_extractor
5957 def add_item(field
, reverse
, closest
, limit_text
):
5958 field
= field
.lower()
5959 if field
in self
._order
:
5961 self
._order
.append(field
)
5962 limit
= self
._resolve
_field
_value
(field
, limit_text
)
5965 'closest': False if limit
is None else closest
,
5966 'limit_text': limit_text
,
5968 if field
in self
.settings
:
5969 self
.settings
[field
].update(data
)
5971 self
.settings
[field
] = data
5974 tuple(field
for field
in self
.default
if self
._get
_field
_setting
(field
, 'forced'))
5975 + (tuple() if params
.get('format_sort_force', False)
5976 else tuple(field
for field
in self
.default
if self
._get
_field
_setting
(field
, 'priority')))
5977 + tuple(self
._sort
_user
) + tuple(sort_extractor
) + self
.default
)
5979 for item
in sort_list
:
5980 match
= re
.match(self
.regex
, item
)
5982 raise ExtractorError('Invalid format sort string "%s" given by extractor' % item
)
5983 field
= match
.group('field')
5986 if self
._get
_field
_setting
(field
, 'type') == 'alias':
5987 alias
, field
= field
, self
._get
_field
_setting
(field
, 'field')
5988 if self
._get
_field
_setting
(alias
, 'deprecated'):
5989 self
.ydl
.deprecated_feature(f
'Format sorting alias {alias} is deprecated and may '
5990 f
'be removed in a future version. Please use {field} instead')
5991 reverse
= match
.group('reverse') is not None
5992 closest
= match
.group('separator') == '~'
5993 limit_text
= match
.group('limit')
5995 has_limit
= limit_text
is not None
5996 has_multiple_fields
= self
._get
_field
_setting
(field
, 'type') == 'combined'
5997 has_multiple_limits
= has_limit
and has_multiple_fields
and not self
._get
_field
_setting
(field
, 'same_limit')
5999 fields
= self
._get
_field
_setting
(field
, 'field') if has_multiple_fields
else (field
,)
6000 limits
= limit_text
.split(':') if has_multiple_limits
else (limit_text
,) if has_limit
else tuple()
6001 limit_count
= len(limits
)
6002 for (i
, f
) in enumerate(fields
):
6003 add_item(f
, reverse
, closest
,
6004 limits
[i
] if i
< limit_count
6005 else limits
[0] if has_limit
and not has_multiple_limits
6008 def print_verbose_info(self
, write_debug
):
6010 write_debug('Sort order given by user: %s' % ', '.join(self
._sort
_user
))
6011 if self
._sort
_extractor
:
6012 write_debug('Sort order given by extractor: %s' % ', '.join(self
._sort
_extractor
))
6013 write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
6014 '+' if self
._get
_field
_setting
(field
, 'reverse') else '', field
,
6015 '%s%s(%s)' % ('~' if self
._get
_field
_setting
(field
, 'closest') else ':',
6016 self
._get
_field
_setting
(field
, 'limit_text'),
6017 self
._get
_field
_setting
(field
, 'limit'))
6018 if self
._get
_field
_setting
(field
, 'limit_text') is not None else '')
6019 for field
in self
._order
if self
._get
_field
_setting
(field
, 'visible')]))
6021 def _calculate_field_preference_from_value(self
, format
, field
, type, value
):
6022 reverse
= self
._get
_field
_setting
(field
, 'reverse')
6023 closest
= self
._get
_field
_setting
(field
, 'closest')
6024 limit
= self
._get
_field
_setting
(field
, 'limit')
6026 if type == 'extractor':
6027 maximum
= self
._get
_field
_setting
(field
, 'max')
6028 if value
is None or (maximum
is not None and value
>= maximum
):
6030 elif type == 'boolean':
6031 in_list
= self
._get
_field
_setting
(field
, 'in_list')
6032 not_in_list
= self
._get
_field
_setting
(field
, 'not_in_list')
6033 value
= 0 if ((in_list
is None or value
in in_list
) and (not_in_list
is None or value
not in not_in_list
)) else -1
6034 elif type == 'ordered':
6035 value
= self
._resolve
_field
_value
(field
, value
, True)
6037 # try to convert to number
6038 val_num
= float_or_none(value
, default
=self
._get
_field
_setting
(field
, 'default'))
6039 is_num
= self
._get
_field
_setting
(field
, 'convert') != 'string' and val_num
is not None
6043 return ((-10, 0) if value
is None
6044 else (1, value
, 0) if not is_num
# if a field has mixed strings and numbers, strings are sorted higher
6045 else (0, -abs(value
- limit
), value
- limit
if reverse
else limit
- value
) if closest
6046 else (0, value
, 0) if not reverse
and (limit
is None or value
<= limit
)
6047 else (0, -value
, 0) if limit
is None or (reverse
and value
== limit
) or value
> limit
6048 else (-1, value
, 0))
6050 def _calculate_field_preference(self
, format
, field
):
6051 type = self
._get
_field
_setting
(field
, 'type') # extractor, boolean, ordered, field, multiple
6052 get_value
= lambda f
: format
.get(self
._get
_field
_setting
(f
, 'field'))
6053 if type == 'multiple':
6054 type = 'field' # Only 'field' is allowed in multiple for now
6055 actual_fields
= self
._get
_field
_setting
(field
, 'field')
6057 value
= self
._get
_field
_setting
(field
, 'function')(get_value(f
) for f
in actual_fields
)
6059 value
= get_value(field
)
6060 return self
._calculate
_field
_preference
_from
_value
(format
, field
, type, value
)
6062 def calculate_preference(self
, format
):
6063 # Determine missing protocol
6064 if not format
.get('protocol'):
6065 format
['protocol'] = determine_protocol(format
)
6067 # Determine missing ext
6068 if not format
.get('ext') and 'url' in format
:
6069 format
['ext'] = determine_ext(format
['url'])
6070 if format
.get('vcodec') == 'none':
6071 format
['audio_ext'] = format
['ext'] if format
.get('acodec') != 'none' else 'none'
6072 format
['video_ext'] = 'none'
6074 format
['video_ext'] = format
['ext']
6075 format
['audio_ext'] = 'none'
6076 # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
6077 # format['preference'] = -1000
6079 if format
.get('preference') is None and format
.get('ext') == 'flv' and re
.match('[hx]265|he?vc?', format
.get('vcodec') or ''):
6080 # HEVC-over-FLV is out-of-spec by FLV's original spec
6081 # ref. https://trac.ffmpeg.org/ticket/6389
6082 # ref. https://github.com/yt-dlp/yt-dlp/pull/5821
6083 format
['preference'] = -100
6085 # Determine missing bitrates
6086 if format
.get('tbr') is None:
6087 if format
.get('vbr') is not None and format
.get('abr') is not None:
6088 format
['tbr'] = format
.get('vbr', 0) + format
.get('abr', 0)
6090 if format
.get('vcodec') != 'none' and format
.get('vbr') is None:
6091 format
['vbr'] = format
.get('tbr') - format
.get('abr', 0)
6092 if format
.get('acodec') != 'none' and format
.get('abr') is None:
6093 format
['abr'] = format
.get('tbr') - format
.get('vbr', 0)
6095 return tuple(self
._calculate
_field
_preference
(format
, field
) for field
in self
._order
)