43 import xml
.etree
.ElementTree
45 from . import traversal
47 from ..compat
import functools
# isort: split
48 from ..compat
import (
49 compat_etree_fromstring
,
51 compat_HTMLParseError
,
55 from ..dependencies
import xattr
57 __name__
= __name__
.rsplit('.', 1)[0] # Pretend to be the parent module
59 # This is not clearly defined otherwise
60 compiled_regex_type
= type(re
.compile(''))
71 ENGLISH_MONTH_NAMES
= [
72 'January', 'February', 'March', 'April', 'May', 'June',
73 'July', 'August', 'September', 'October', 'November', 'December']
76 'en': ENGLISH_MONTH_NAMES
,
78 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
79 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
80 # these follow the genitive grammatical case (dopełniacz)
81 # some websites might be using nominative, which will require another month list
82 # https://en.wikibooks.org/wiki/Polish/Noun_cases
83 'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
84 'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
87 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
89 'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
90 'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
91 'EST': -5, 'EDT': -4, # Eastern
92 'CST': -6, 'CDT': -5, # Central
93 'MST': -7, 'MDT': -6, # Mountain
94 'PST': -8, 'PDT': -7 # Pacific
97 # needed for sanitizing filenames in restricted mode
98 ACCENT_CHARS
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
99 itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
100 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
130 '%Y-%m-%d %H:%M:%S.%f',
131 '%Y-%m-%d %H:%M:%S:%f',
134 '%Y-%m-%dT%H:%M:%SZ',
135 '%Y-%m-%dT%H:%M:%S.%fZ',
136 '%Y-%m-%dT%H:%M:%S.%f0Z',
138 '%Y-%m-%dT%H:%M:%S.%f',
141 '%b %d %Y at %H:%M:%S',
143 '%B %d %Y at %H:%M:%S',
147 DATE_FORMATS_DAY_FIRST
= list(DATE_FORMATS
)
148 DATE_FORMATS_DAY_FIRST
.extend([
159 DATE_FORMATS_MONTH_FIRST
= list(DATE_FORMATS
)
160 DATE_FORMATS_MONTH_FIRST
.extend([
168 PACKED_CODES_RE
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
169 JSON_LD_RE
= r
'(?is)<script[^>]+type=(["\']?
)application
/ld\
+json\
1[^
>]*>\s
*(?P
<json_ld
>{.+?}|\
[.+?\
])\s
*</script
>'
171 NUMBER_RE = r'\d
+(?
:\
.\d
+)?
'
175 def preferredencoding():
176 """Get preferred encoding.
178 Returns the best encoding scheme for the system, based on
179 locale.getpreferredencoding() and some further tweaks.
182 pref = locale.getpreferredencoding()
190 def write_json_file(obj, fn):
191 """ Encode obj as JSON and write it to fn, atomically if possible """
193 tf = tempfile.NamedTemporaryFile(
194 prefix=f'{os.path.basename(fn)}
.', dir=os.path.dirname(fn),
195 suffix='.tmp
', delete=False, mode='w
', encoding='utf
-8')
199 json.dump(obj, tf, ensure_ascii=False)
200 if sys.platform == 'win32
':
201 # Need to remove existing file on Windows, else os.rename raises
202 # WindowsError or FileExistsError.
203 with contextlib.suppress(OSError):
205 with contextlib.suppress(OSError):
208 os.chmod(tf.name, 0o666 & ~mask)
209 os.rename(tf.name, fn)
211 with contextlib.suppress(OSError):
216 def find_xpath_attr(node, xpath, key, val=None):
217 """ Find the xpath xpath[@key=val] """
218 assert re.match(r'^
[a
-zA
-Z_
-]+$
', key)
219 expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}
']")
220 return node.find(expr)
222 # On python2.6 the xml.etree.ElementTree.Element methods don't support
223 # the namespace parameter
226 def xpath_with_ns(path
, ns_map
):
227 components
= [c
.split(':') for c
in path
.split('/')]
231 replaced
.append(c
[0])
234 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
235 return '/'.join(replaced
)
238 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
239 def _find_xpath(xpath
):
240 return node
.find(xpath
)
242 if isinstance(xpath
, str):
243 n
= _find_xpath(xpath
)
251 if default
is not NO_DEFAULT
:
254 name
= xpath
if name
is None else name
255 raise ExtractorError('Could not find XML element %s' % name
)
261 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
262 n
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
)
263 if n
is None or n
== default
:
266 if default
is not NO_DEFAULT
:
269 name
= xpath
if name
is None else name
270 raise ExtractorError('Could not find XML element\'s text %s' % name
)
276 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
):
277 n
= find_xpath_attr(node
, xpath
, key
)
279 if default
is not NO_DEFAULT
:
282 name
= f
'{xpath}[@{key}]' if name
is None else name
283 raise ExtractorError('Could not find XML attribute %s' % name
)
289 def get_element_by_id(id, html
, **kwargs
):
290 """Return the content of the tag with the specified ID in the passed HTML document"""
291 return get_element_by_attribute('id', id, html
, **kwargs
)
294 def get_element_html_by_id(id, html
, **kwargs
):
295 """Return the html of the tag with the specified ID in the passed HTML document"""
296 return get_element_html_by_attribute('id', id, html
, **kwargs
)
299 def get_element_by_class(class_name
, html
):
300 """Return the content of the first tag with the specified class in the passed HTML document"""
301 retval
= get_elements_by_class(class_name
, html
)
302 return retval
[0] if retval
else None
305 def get_element_html_by_class(class_name
, html
):
306 """Return the html of the first tag with the specified class in the passed HTML document"""
307 retval
= get_elements_html_by_class(class_name
, html
)
308 return retval
[0] if retval
else None
311 def get_element_by_attribute(attribute
, value
, html
, **kwargs
):
312 retval
= get_elements_by_attribute(attribute
, value
, html
, **kwargs
)
313 return retval
[0] if retval
else None
316 def get_element_html_by_attribute(attribute
, value
, html
, **kargs
):
317 retval
= get_elements_html_by_attribute(attribute
, value
, html
, **kargs
)
318 return retval
[0] if retval
else None
321 def get_elements_by_class(class_name
, html
, **kargs
):
322 """Return the content of all tags with the specified class in the passed HTML document as a list"""
323 return get_elements_by_attribute(
324 'class', r
'[^\'"]*(?<=[\'"\s
])%s(?
=[\'"\s])[^\'"]*' % re.escape(class_name),
325 html, escape_value=False)
328 def get_elements_html_by_class(class_name, html):
329 """Return the html of all tags with the specified class in the passed HTML document as a list"""
330 return get_elements_html_by_attribute(
331 'class', r'[^
\'"]*(?<=[\'"\s
])%s(?
=[\'"\s])[^\'"]*' % re.escape(class_name),
332 html, escape_value=False)
335 def get_elements_by_attribute(*args, **kwargs):
336 """Return the content of the tag with the specified attribute in the passed HTML document"""
337 return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
340 def get_elements_html_by_attribute(*args, **kwargs):
341 """Return the html of the tag with the specified attribute in the passed HTML document"""
342 return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
345 def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w
:.-]+', escape_value=True):
347 Return the text (content) and the html (whole) of the tag with the specified
348 attribute in the passed HTML document
353 quote = '' if re.match(r'''[\s"'`
=<>]''', value) else '?'
355 value = re.escape(value) if escape_value else value
357 partial_element_re = rf'''(?x
)
359 (?
:\
s(?
:[^
>"']|"[^
"]*"|
'[^']*')*)?
360 \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
363 for m in re.finditer(partial_element_re, html):
364 content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
367 unescapeHTML(re.sub(r'^(?P<q>["\'])(?P
<content
>.*)(?P
=q
)$
', r'\g
<content
>', content, flags=re.DOTALL)),
372 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
374 HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
375 closing tag for the first opening tag it has encountered, and can be used
379 class HTMLBreakOnClosingTagException(Exception):
383 self.tagstack = collections.deque()
384 html.parser.HTMLParser.__init__(self)
389 def __exit__(self, *_):
393 # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
394 # so data remains buffered; we no longer have any interest in it, thus
395 # override this method to discard it
398 def handle_starttag(self, tag, _):
399 self.tagstack.append(tag)
401 def handle_endtag(self, tag):
402 if not self.tagstack:
403 raise compat_HTMLParseError('no tags
in the stack
')
405 inner_tag = self.tagstack.pop()
409 raise compat_HTMLParseError(f'matching opening tag
for closing {tag} tag
not found
')
410 if not self.tagstack:
411 raise self.HTMLBreakOnClosingTagException()
414 # XXX: This should be far less strict
415 def get_element_text_and_html_by_tag(tag, html):
417 For the first element with the specified tag in the passed HTML document
418 return its' content (text
) and the whole
element (html
)
420 def find_or_raise(haystack, needle, exc):
422 return haystack.index(needle)
425 closing_tag = f'</{tag}>'
426 whole_start = find_or_raise(
427 html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
428 content_start = find_or_raise(
429 html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
430 content_start += whole_start + 1
431 with HTMLBreakOnClosingTagParser() as parser:
432 parser.feed(html[whole_start:content_start])
433 if not parser.tagstack or parser.tagstack[0] != tag:
434 raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
435 offset = content_start
436 while offset < len(html):
437 next_closing_tag_start = find_or_raise(
438 html[offset:], closing_tag,
439 compat_HTMLParseError(f'closing {tag} tag not found'))
440 next_closing_tag_end = next_closing_tag_start + len(closing_tag)
442 parser.feed(html[offset:offset + next_closing_tag_end])
443 offset += next_closing_tag_end
444 except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
445 return html[content_start:offset + next_closing_tag_start], \
446 html[whole_start:offset + next_closing_tag_end]
447 raise compat_HTMLParseError('unexpected end of html')
450 class HTMLAttributeParser(html.parser.HTMLParser):
451 """Trivial HTML parser to gather the attributes
for a single element
"""
455 html.parser.HTMLParser.__init__(self)
457 def handle_starttag(self, tag, attrs):
458 self.attrs = dict(attrs)
459 raise compat_HTMLParseError('done')
462 class HTMLListAttrsParser(html.parser.HTMLParser):
463 """HTML parser to gather the attributes
for the elements of a
list"""
466 html.parser.HTMLParser.__init__(self)
470 def handle_starttag(self, tag, attrs):
471 if tag == 'li' and self._level == 0:
472 self.items.append(dict(attrs))
475 def handle_endtag(self, tag):
479 def extract_attributes(html_element):
480 """Given a string
for an HTML element such
as
482 a
="foo" B
="bar" c
="&98;az" d
=boz
483 empty
= noval entity
="&"
486 Decode
and return a dictionary of attributes
.
488 'a': 'foo', 'b': 'bar', c
: 'baz', d
: 'boz',
489 'empty': '', 'noval': None, 'entity': '&',
490 'sq': '"', 'dq': '\''
493 parser = HTMLAttributeParser()
494 with contextlib.suppress(compat_HTMLParseError):
495 parser.feed(html_element)
500 def parse_list(webpage):
501 """Given a string
for an series of HTML
<li
> elements
,
502 return a dictionary of their attributes
"""
503 parser = HTMLListAttrsParser()
509 def clean_html(html):
510 """Clean an HTML snippet into a readable string
"""
512 if html is None: # Convenience for sanitizing descriptions etc.
515 html = re.sub(r'\s+', ' ', html)
516 html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
517 html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
519 html = re.sub('<.*?>', '', html)
520 # Replace html entities
521 html = unescapeHTML(html)
525 class LenientJSONDecoder(json.JSONDecoder):
527 def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs):
528 self.transform_source, self.ignore_extra = transform_source, ignore_extra
529 self._close_attempts = 2 * close_objects
530 super().__init__(*args, **kwargs)
533 def _close_object(err):
534 doc = err.doc[:err.pos]
535 # We need to add comma first to get the correct error message
536 if err.msg.startswith('Expecting \',\''):
538 elif not doc.endswith(','):
541 if err.msg.startswith('Expecting property name'):
542 return doc[:-1] + '}'
543 elif err.msg.startswith('Expecting value'):
544 return doc[:-1] + ']'
547 if self.transform_source:
548 s = self.transform_source(s)
549 for attempt in range(self._close_attempts + 1):
551 if self.ignore_extra:
552 return self.raw_decode(s.lstrip())[0]
553 return super().decode(s)
554 except json.JSONDecodeError as e:
557 elif attempt < self._close_attempts:
558 s = self._close_object(e)
561 raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
562 assert False, 'Too many attempts to decode JSON'
565 def sanitize_open(filename, open_mode):
566 """Try to
open the given filename
, and slightly tweak it
if this fails
.
568 Attempts to
open the given filename
. If this fails
, it tries to change
569 the filename slightly
, step by step
, until it
's either able to open it
570 or it fails and raises a final exception, like the standard open()
573 It returns the tuple (stream, definitive_file_name).
576 if sys.platform == 'win32
':
579 # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
580 with contextlib.suppress(io.UnsupportedOperation):
581 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
582 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
584 for attempt in range(2):
587 if sys.platform == 'win32
':
588 # FIXME: An exclusive lock also locks the file from being read.
589 # Since windows locks are mandatory, don't lock the
file on
windows (for now
).
590 # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
591 raise LockingUnsupportedError()
592 stream
= locked_file(filename
, open_mode
, block
=False).__enter
__()
594 stream
= open(filename
, open_mode
)
595 return stream
, filename
596 except OSError as err
:
597 if attempt
or err
.errno
in (errno
.EACCES
,):
599 old_filename
, filename
= filename
, sanitize_path(filename
)
600 if old_filename
== filename
:
604 def timeconvert(timestr
):
605 """Convert RFC 2822 defined time string into system timestamp"""
607 timetuple
= email
.utils
.parsedate_tz(timestr
)
608 if timetuple
is not None:
609 timestamp
= email
.utils
.mktime_tz(timetuple
)
613 def sanitize_filename(s
, restricted
=False, is_id
=NO_DEFAULT
):
614 """Sanitizes a string so it could be used as part of a filename.
615 @param restricted Use a stricter subset of allowed characters
616 @param is_id Whether this is an ID that should be kept unchanged if possible.
617 If unset, yt-dlp's new sanitization rules are in effect
622 def replace_insane(char
):
623 if restricted
and char
in ACCENT_CHARS
:
624 return ACCENT_CHARS
[char
]
625 elif not restricted
and char
== '\n':
627 elif is_id
is NO_DEFAULT
and not restricted
and char
in '"*:<>?|/\\':
628 # Replace with their full-width unicode counterparts
629 return {'/': '\u29F8', '\\': '\u29f9'}
.get(char
, chr(ord(char
) + 0xfee0))
630 elif char
== '?' or ord(char
) < 32 or ord(char
) == 127:
633 return '' if restricted
else '\''
635 return '\0_\0-' if restricted
else '\0 \0-'
636 elif char
in '\\/|*<>':
638 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace() or ord(char
) > 127):
642 # Replace look-alike Unicode glyphs
643 if restricted
and (is_id
is NO_DEFAULT
or not is_id
):
644 s
= unicodedata
.normalize('NFKC', s
)
645 s
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) # Handle timestamps
646 result
= ''.join(map(replace_insane
, s
))
647 if is_id
is NO_DEFAULT
:
648 result
= re
.sub(r
'(\0.)(?:(?=\1)..)+', r
'\1', result
) # Remove repeated substitute chars
649 STRIP_RE
= r
'(?:\0.|[ _-])*'
650 result
= re
.sub(f
'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result
) # Remove substitute chars from start/end
651 result
= result
.replace('\0', '') or '_'
654 while '__' in result
:
655 result
= result
.replace('__', '_')
656 result
= result
.strip('_')
657 # Common case of "Foreign band name - English song title"
658 if restricted
and result
.startswith('-_'):
660 if result
.startswith('-'):
661 result
= '_' + result
[len('-'):]
662 result
= result
.lstrip('.')
668 def sanitize_path(s
, force
=False):
669 """Sanitizes and normalizes path on Windows"""
670 # XXX: this handles drive relative paths (c:sth) incorrectly
671 if sys
.platform
== 'win32':
673 drive_or_unc
, _
= os
.path
.splitdrive(s
)
679 norm_path
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
)
683 path_part
if path_part
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
)
684 for path_part
in norm_path
]
686 sanitized_path
.insert(0, drive_or_unc
+ os
.path
.sep
)
687 elif force
and s
and s
[0] == os
.path
.sep
:
688 sanitized_path
.insert(0, os
.path
.sep
)
689 # TODO: Fix behavioral differences <3.12
690 # The workaround using `normpath` only superficially passes tests
691 # Ref: https://github.com/python/cpython/pull/100351
692 return os
.path
.normpath(os
.path
.join(*sanitized_path
))
695 def sanitize_url(url
, *, scheme
='http'):
696 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
697 # the number of unwanted failures due to missing protocol
700 elif url
.startswith('//'):
701 return f
'{scheme}:{url}'
702 # Fix some common typos seen so far
704 # https://github.com/ytdl-org/youtube-dl/issues/15649
705 (r
'^httpss://', r
'https://'),
706 # https://bx1.be/lives/direct-tv/
707 (r
'^rmtp([es]?)://', r
'rtmp\1://'),
709 for mistake
, fixup
in COMMON_TYPOS
:
710 if re
.match(mistake
, url
):
711 return re
.sub(mistake
, fixup
, url
)
715 def extract_basic_auth(url
):
716 parts
= urllib
.parse
.urlsplit(url
)
717 if parts
.username
is None:
719 url
= urllib
.parse
.urlunsplit(parts
._replace
(netloc
=(
720 parts
.hostname
if parts
.port
is None
721 else '%s:%d' % (parts
.hostname
, parts
.port
))))
722 auth_payload
= base64
.b64encode(
723 ('%s:%s' % (parts
.username
, parts
.password
or '')).encode())
724 return url
, f
'Basic {auth_payload.decode()}'
728 """Expand shell variables and ~"""
729 return os
.path
.expandvars(compat_expanduser(s
))
732 def orderedSet(iterable
, *, lazy
=False):
733 """Remove all duplicates from the input iterable"""
735 seen
= [] # Do not use set since the items can be unhashable
741 return _iter() if lazy
else list(_iter())
744 def _htmlentity_transform(entity_with_semicolon
):
745 """Transforms an HTML entity to a character."""
746 entity
= entity_with_semicolon
[:-1]
748 # Known non-numeric HTML entity
749 if entity
in html
.entities
.name2codepoint
:
750 return chr(html
.entities
.name2codepoint
[entity
])
752 # TODO: HTML5 allows entities without a semicolon.
753 # E.g. 'Éric' should be decoded as 'Éric'.
754 if entity_with_semicolon
in html
.entities
.html5
:
755 return html
.entities
.html5
[entity_with_semicolon
]
757 mobj
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
)
759 numstr
= mobj
.group(1)
760 if numstr
.startswith('x'):
762 numstr
= '0%s' % numstr
765 # See https://github.com/ytdl-org/youtube-dl/issues/7518
766 with contextlib
.suppress(ValueError):
767 return chr(int(numstr
, base
))
769 # Unknown entity in name, return its literal representation
770 return '&%s;' % entity
776 assert isinstance(s
, str)
779 r
'&([^&;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
)
782 def escapeHTML(text
):
785 .replace('&', '&')
786 .replace('<', '<')
787 .replace('>', '>')
788 .replace('"', '"')
789 .replace("'", ''')
793 class netrc_from_content(netrc
.netrc
):
794 def __init__(self
, content
):
795 self
.hosts
, self
.macros
= {}, {}
796 with io
.StringIO(content
) as stream
:
797 self
._parse
('-', stream
, False)
800 class Popen(subprocess
.Popen
):
801 if sys
.platform
== 'win32':
802 _startupinfo
= subprocess
.STARTUPINFO()
803 _startupinfo
.dwFlags |
= subprocess
.STARTF_USESHOWWINDOW
808 def _fix_pyinstaller_ld_path(env
):
809 """Restore LD_LIBRARY_PATH when using PyInstaller
810 Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
811 https://github.com/yt-dlp/yt-dlp/issues/4573
813 if not hasattr(sys
, '_MEIPASS'):
817 orig
= env
.get(f
'{key}_ORIG')
823 _fix('LD_LIBRARY_PATH') # Linux
824 _fix('DYLD_LIBRARY_PATH') # macOS
826 def __init__(self
, args
, *remaining
, env
=None, text
=False, shell
=False, **kwargs
):
828 env
= os
.environ
.copy()
829 self
._fix
_pyinstaller
_ld
_path
(env
)
831 self
.__text
_mode
= kwargs
.get('encoding') or kwargs
.get('errors') or text
or kwargs
.get('universal_newlines')
833 kwargs
['universal_newlines'] = True # For 3.6 compatibility
834 kwargs
.setdefault('encoding', 'utf-8')
835 kwargs
.setdefault('errors', 'replace')
837 if shell
and compat_os_name
== 'nt' and kwargs
.get('executable') is None:
838 if not isinstance(args
, str):
839 args
= ' '.join(compat_shlex_quote(a
) for a
in args
)
841 args
= f
'{self.__comspec()} /Q /S /D /V:OFF /C "{args}"'
843 super().__init
__(args
, *remaining
, env
=env
, shell
=shell
, **kwargs
, startupinfo
=self
._startupinfo
)
846 comspec
= os
.environ
.get('ComSpec') or os
.path
.join(
847 os
.environ
.get('SystemRoot', ''), 'System32', 'cmd.exe')
848 if os
.path
.isabs(comspec
):
850 raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set')
852 def communicate_or_kill(self
, *args
, **kwargs
):
854 return self
.communicate(*args
, **kwargs
)
855 except BaseException
: # Including KeyboardInterrupt
856 self
.kill(timeout
=None)
859 def kill(self
, *, timeout
=0):
862 self
.wait(timeout
=timeout
)
865 def run(cls
, *args
, timeout
=None, **kwargs
):
866 with cls(*args
, **kwargs
) as proc
:
867 default
= '' if proc
.__text
_mode
else b
''
868 stdout
, stderr
= proc
.communicate_or_kill(timeout
=timeout
)
869 return stdout
or default
, stderr
or default
, proc
.returncode
872 def encodeArgument(s
):
873 # Legacy code that uses byte strings
874 # Uncomment the following line after fixing all post processors
875 # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
876 return s
if isinstance(s
, str) else s
.decode('ascii')
879 _timetuple
= collections
.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
882 def timetuple_from_msec(msec
):
883 secs
, msec
= divmod(msec
, 1000)
884 mins
, secs
= divmod(secs
, 60)
885 hrs
, mins
= divmod(mins
, 60)
886 return _timetuple(hrs
, mins
, secs
, msec
)
889 def formatSeconds(secs
, delim
=':', msec
=False):
890 time
= timetuple_from_msec(secs
* 1000)
892 ret
= '%d%s%02d%s%02d' % (time
.hours
, delim
, time
.minutes
, delim
, time
.seconds
)
894 ret
= '%d%s%02d' % (time
.minutes
, delim
, time
.seconds
)
896 ret
= '%d' % time
.seconds
897 return '%s.%03d' % (ret
, time
.milliseconds
) if msec
else ret
900 def bug_reports_message(before
=';'):
901 from ..update
import REPOSITORY
903 msg
= (f
'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
904 'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
906 before
= before
.rstrip()
907 if not before
or before
.endswith(('.', '!', '?')):
908 msg
= msg
[0].title() + msg
[1:]
910 return (before
+ ' ' if before
else '') + msg
913 class YoutubeDLError(Exception):
914 """Base exception for YoutubeDL errors."""
917 def __init__(self
, msg
=None):
920 elif self
.msg
is None:
921 self
.msg
= type(self
).__name
__
922 super().__init
__(self
.msg
)
925 class ExtractorError(YoutubeDLError
):
926 """Error during info extraction."""
928 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None, ie
=None):
929 """ tb, if given, is the original traceback (so that it can be printed out).
930 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
932 from ..networking
.exceptions
import network_exceptions
933 if sys
.exc_info()[0] in network_exceptions
:
936 self
.orig_msg
= str(msg
)
938 self
.expected
= expected
940 self
.video_id
= video_id
942 self
.exc_info
= sys
.exc_info() # preserve original exception
943 if isinstance(self
.exc_info
[1], ExtractorError
):
944 self
.exc_info
= self
.exc_info
[1].exc_info
945 super().__init
__(self
.__msg
)
950 format_field(self
.ie
, None, '[%s] '),
951 format_field(self
.video_id
, None, '%s: '),
953 format_field(self
.cause
, None, ' (caused by %r)'),
954 '' if self
.expected
else bug_reports_message()))
956 def format_traceback(self
):
957 return join_nonempty(
958 self
.traceback
and ''.join(traceback
.format_tb(self
.traceback
)),
959 self
.cause
and ''.join(traceback
.format_exception(None, self
.cause
, self
.cause
.__traceback
__)[1:]),
962 def __setattr__(self
, name
, value
):
963 super().__setattr
__(name
, value
)
964 if getattr(self
, 'msg', None) and name
not in ('msg', 'args'):
965 self
.msg
= self
.__msg
or type(self
).__name
__
966 self
.args
= (self
.msg
, ) # Cannot be property
969 class UnsupportedError(ExtractorError
):
970 def __init__(self
, url
):
972 'Unsupported URL: %s' % url
, expected
=True)
976 class RegexNotFoundError(ExtractorError
):
977 """Error when a regex didn't match"""
981 class GeoRestrictedError(ExtractorError
):
982 """Geographic restriction Error exception.
984 This exception may be thrown when a video is not available from your
985 geographic location due to geographic restrictions imposed by a website.
988 def __init__(self
, msg
, countries
=None, **kwargs
):
989 kwargs
['expected'] = True
990 super().__init
__(msg
, **kwargs
)
991 self
.countries
= countries
994 class UserNotLive(ExtractorError
):
995 """Error when a channel/user is not live"""
997 def __init__(self
, msg
=None, **kwargs
):
998 kwargs
['expected'] = True
999 super().__init
__(msg
or 'The channel is not currently live', **kwargs
)
1002 class DownloadError(YoutubeDLError
):
1003 """Download Error exception.
1005 This exception may be thrown by FileDownloader objects if they are not
1006 configured to continue on errors. They will contain the appropriate
1010 def __init__(self
, msg
, exc_info
=None):
1011 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1012 super().__init
__(msg
)
1013 self
.exc_info
= exc_info
1016 class EntryNotInPlaylist(YoutubeDLError
):
1017 """Entry not in playlist exception.
1019 This exception will be thrown by YoutubeDL when a requested entry
1020 is not found in the playlist info_dict
1022 msg
= 'Entry not found in info'
1025 class SameFileError(YoutubeDLError
):
1026 """Same File exception.
1028 This exception will be thrown by FileDownloader objects if they detect
1029 multiple files would have to be downloaded to the same file on disk.
1031 msg
= 'Fixed output name but more than one file to download'
1033 def __init__(self
, filename
=None):
1034 if filename
is not None:
1035 self
.msg
+= f
': {filename}'
1036 super().__init
__(self
.msg
)
1039 class PostProcessingError(YoutubeDLError
):
1040 """Post Processing exception.
1042 This exception may be raised by PostProcessor's .run() method to
1043 indicate an error in the postprocessing task.
1047 class DownloadCancelled(YoutubeDLError
):
1048 """ Exception raised when the download queue should be interrupted """
1049 msg
= 'The download was cancelled'
1052 class ExistingVideoReached(DownloadCancelled
):
1053 """ --break-on-existing triggered """
1054 msg
= 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1057 class RejectedVideoReached(DownloadCancelled
):
1058 """ --break-match-filter triggered """
1059 msg
= 'Encountered a video that did not match filter, stopping due to --break-match-filter'
1062 class MaxDownloadsReached(DownloadCancelled
):
1063 """ --max-downloads limit has been reached. """
1064 msg
= 'Maximum number of downloads reached, stopping due to --max-downloads'
1067 class ReExtractInfo(YoutubeDLError
):
1068 """ Video info needs to be re-extracted. """
1070 def __init__(self
, msg
, expected
=False):
1071 super().__init
__(msg
)
1072 self
.expected
= expected
1075 class ThrottledDownload(ReExtractInfo
):
1076 """ Download speed below --throttled-rate. """
1077 msg
= 'The download speed is below throttle limit'
1080 super().__init
__(self
.msg
, expected
=False)
1083 class UnavailableVideoError(YoutubeDLError
):
1084 """Unavailable Format exception.
1086 This exception will be thrown when a video is requested
1087 in a format that is not available for that video.
1089 msg
= 'Unable to download video'
1091 def __init__(self
, err
=None):
1093 self
.msg
+= f
': {err}'
1094 super().__init
__(self
.msg
)
1097 class ContentTooShortError(YoutubeDLError
):
1098 """Content Too Short exception.
1100 This exception may be raised by FileDownloader objects when a file they
1101 download is too small for what the server announced first, indicating
1102 the connection was probably interrupted.
1105 def __init__(self
, downloaded
, expected
):
1106 super().__init
__(f
'Downloaded {downloaded} bytes, expected {expected} bytes')
1108 self
.downloaded
= downloaded
1109 self
.expected
= expected
1112 class XAttrMetadataError(YoutubeDLError
):
1113 def __init__(self
, code
=None, msg
='Unknown error'):
1114 super().__init
__(msg
)
1118 # Parsing code and msg
1119 if (self
.code
in (errno
.ENOSPC
, errno
.EDQUOT
)
1120 or 'No space left' in self
.msg
or 'Disk quota exceeded' in self
.msg
):
1121 self
.reason
= 'NO_SPACE'
1122 elif self
.code
== errno
.E2BIG
or 'Argument list too long' in self
.msg
:
1123 self
.reason
= 'VALUE_TOO_LONG'
1125 self
.reason
= 'NOT_SUPPORTED'
1128 class XAttrUnavailableError(YoutubeDLError
):
1132 def is_path_like(f
):
1133 return isinstance(f
, (str, bytes, os
.PathLike
))
1136 def extract_timezone(date_str
):
1139 ^.{8,}? # >=8 char non-TZ prefix, if present
1140 (?P<tz>Z| # just the UTC Z, or
1141 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
1142 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1143 [ ]? # optional space
1144 (?P<sign>\+|-) # +/-
1145 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
1149 m
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
)
1150 timezone
= TIMEZONE_NAMES
.get(m
and m
.group('tz').strip())
1151 if timezone
is not None:
1152 date_str
= date_str
[:-len(m
.group('tz'))]
1153 timezone
= datetime
.timedelta(hours
=timezone
or 0)
1155 date_str
= date_str
[:-len(m
.group('tz'))]
1156 if not m
.group('sign'):
1157 timezone
= datetime
.timedelta()
1159 sign
= 1 if m
.group('sign') == '+' else -1
1160 timezone
= datetime
.timedelta(
1161 hours
=sign
* int(m
.group('hours')),
1162 minutes
=sign
* int(m
.group('minutes')))
1163 return timezone
, date_str
1166 def parse_iso8601(date_str
, delimiter
='T', timezone
=None):
1167 """ Return a UNIX timestamp from the given date """
1169 if date_str
is None:
1172 date_str
= re
.sub(r
'\.[0-9]+', '', date_str
)
1174 if timezone
is None:
1175 timezone
, date_str
= extract_timezone(date_str
)
1177 with contextlib
.suppress(ValueError):
1178 date_format
= f
'%Y-%m-%d{delimiter}%H:%M:%S'
1179 dt
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
1180 return calendar
.timegm(dt
.timetuple())
1183 def date_formats(day_first
=True):
1184 return DATE_FORMATS_DAY_FIRST
if day_first
else DATE_FORMATS_MONTH_FIRST
1187 def unified_strdate(date_str
, day_first
=True):
1188 """Return a string with the date in the format YYYYMMDD"""
1190 if date_str
is None:
1194 date_str
= date_str
.replace(',', ' ')
1195 # Remove AM/PM + timezone
1196 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
1197 _
, date_str
= extract_timezone(date_str
)
1199 for expression
in date_formats(day_first
):
1200 with contextlib
.suppress(ValueError):
1201 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
1202 if upload_date
is None:
1203 timetuple
= email
.utils
.parsedate_tz(date_str
)
1205 with contextlib
.suppress(ValueError):
1206 upload_date
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d')
1207 if upload_date
is not None:
1208 return str(upload_date
)
1211 def unified_timestamp(date_str
, day_first
=True):
1212 if not isinstance(date_str
, str):
1215 date_str
= re
.sub(r
'\s+', ' ', re
.sub(
1216 r
'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str
))
1218 pm_delta
= 12 if re
.search(r
'(?i)PM', date_str
) else 0
1219 timezone
, date_str
= extract_timezone(date_str
)
1221 # Remove AM/PM + timezone
1222 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
1224 # Remove unrecognized timezones from ISO 8601 alike timestamps
1225 m
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
)
1227 date_str
= date_str
[:-len(m
.group('tz'))]
1229 # Python only supports microseconds, so remove nanoseconds
1230 m
= re
.search(r
'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str
)
1232 date_str
= m
.group(1)
1234 for expression
in date_formats(day_first
):
1235 with contextlib
.suppress(ValueError):
1236 dt
= datetime
.datetime
.strptime(date_str
, expression
) - timezone
+ datetime
.timedelta(hours
=pm_delta
)
1237 return calendar
.timegm(dt
.timetuple())
1239 timetuple
= email
.utils
.parsedate_tz(date_str
)
1241 return calendar
.timegm(timetuple
) + pm_delta
* 3600 - timezone
.total_seconds()
1244 def determine_ext(url
, default_ext
='unknown_video'):
1245 if url
is None or '.' not in url
:
1247 guess
= url
.partition('?')[0].rpartition('.')[2]
1248 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
1250 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1251 elif guess
.rstrip('/') in KNOWN_EXTENSIONS
:
1252 return guess
.rstrip('/')
1257 def subtitles_filename(filename
, sub_lang
, sub_format
, expected_real_ext
=None):
1258 return replace_extension(filename
, sub_lang
+ '.' + sub_format
, expected_real_ext
)
1261 def datetime_from_str(date_str
, precision
='auto', format
='%Y%m%d'):
1263 Return a datetime object from a string.
1265 (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1267 @param format strftime format of DATE
1268 @param precision Round the datetime object: auto|microsecond|second|minute|hour|day
1269 auto: round to the unit provided in date_str (if applicable).
1271 auto_precision
= False
1272 if precision
== 'auto':
1273 auto_precision
= True
1274 precision
= 'microsecond'
1275 today
= datetime_round(datetime
.datetime
.now(datetime
.timezone
.utc
), precision
)
1276 if date_str
in ('now', 'today'):
1278 if date_str
== 'yesterday':
1279 return today
- datetime
.timedelta(days
=1)
1281 r
'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1283 if match
is not None:
1284 start_time
= datetime_from_str(match
.group('start'), precision
, format
)
1285 time
= int(match
.group('time')) * (-1 if match
.group('sign') == '-' else 1)
1286 unit
= match
.group('unit')
1287 if unit
== 'month' or unit
== 'year':
1288 new_date
= datetime_add_months(start_time
, time
* 12 if unit
== 'year' else time
)
1294 delta
= datetime
.timedelta(**{unit + 's': time}
)
1295 new_date
= start_time
+ delta
1297 return datetime_round(new_date
, unit
)
1300 return datetime_round(datetime
.datetime
.strptime(date_str
, format
), precision
)
1303 def date_from_str(date_str
, format
='%Y%m%d', strict
=False):
1305 Return a date object from a string using datetime_from_str
1307 @param strict Restrict allowed patterns to "YYYYMMDD" and
1308 (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1310 if strict
and not re
.fullmatch(r
'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str
):
1311 raise ValueError(f
'Invalid date format "{date_str}"')
1312 return datetime_from_str(date_str
, precision
='microsecond', format
=format
).date()
1315 def datetime_add_months(dt
, months
):
1316 """Increment/Decrement a datetime object by months."""
1317 month
= dt
.month
+ months
- 1
1318 year
= dt
.year
+ month
// 12
1319 month
= month
% 12 + 1
1320 day
= min(dt
.day
, calendar
.monthrange(year
, month
)[1])
1321 return dt
.replace(year
, month
, day
)
1324 def datetime_round(dt
, precision
='day'):
1326 Round a datetime object's time to a specific precision
1328 if precision
== 'microsecond':
1337 roundto
= lambda x
, n
: ((x
+ n
/ 2) // n
) * n
1338 timestamp
= roundto(calendar
.timegm(dt
.timetuple()), unit_seconds
[precision
])
1339 return datetime
.datetime
.fromtimestamp(timestamp
, datetime
.timezone
.utc
)
1342 def hyphenate_date(date_str
):
1344 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1345 match
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
)
1346 if match
is not None:
1347 return '-'.join(match
.groups())
1353 """Represents a time interval between two dates"""
1355 def __init__(self
, start
=None, end
=None):
1356 """start and end must be strings in the format accepted by date"""
1357 if start
is not None:
1358 self
.start
= date_from_str(start
, strict
=True)
1360 self
.start
= datetime
.datetime
.min.date()
1362 self
.end
= date_from_str(end
, strict
=True)
1364 self
.end
= datetime
.datetime
.max.date()
1365 if self
.start
> self
.end
:
1366 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
1370 """Returns a range that only contains the given day"""
1371 return cls(day
, day
)
1373 def __contains__(self
, date
):
1374 """Check if the date is in the range"""
1375 if not isinstance(date
, datetime
.date
):
1376 date
= date_from_str(date
)
1377 return self
.start
<= date
<= self
.end
1380 return f
'{__name__}.{type(self).__name__}({self.start.isoformat()!r}, {self.end.isoformat()!r})'
1382 def __eq__(self
, other
):
1383 return (isinstance(other
, DateRange
)
1384 and self
.start
== other
.start
and self
.end
== other
.end
)
1388 def system_identifier():
1389 python_implementation
= platform
.python_implementation()
1390 if python_implementation
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
1391 python_implementation
+= ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
1393 with contextlib
.suppress(OSError): # We may not have access to the executable
1394 libc_ver
= platform
.libc_ver()
1396 return 'Python %s (%s %s %s) - %s (%s%s)' % (
1397 platform
.python_version(),
1398 python_implementation
,
1400 platform
.architecture()[0],
1401 platform
.platform(),
1402 ssl
.OPENSSL_VERSION
,
1403 format_field(join_nonempty(*libc_ver
, delim
=' '), None, ', %s'),
1408 def get_windows_version():
1409 ''' Get Windows version. returns () if it's not running on Windows '''
1410 if compat_os_name
== 'nt':
1411 return version_tuple(platform
.win32_ver()[1])
1416 def write_string(s
, out
=None, encoding
=None):
1417 assert isinstance(s
, str)
1418 out
= out
or sys
.stderr
1419 # `sys.stderr` might be `None` (Ref: https://github.com/pyinstaller/pyinstaller/pull/7217)
1423 if compat_os_name
== 'nt' and supports_terminal_sequences(out
):
1424 s
= re
.sub(r
'([\r\n]+)', r
' \1', s
)
1426 enc
, buffer = None, out
1427 if 'b' in getattr(out
, 'mode', ''):
1428 enc
= encoding
or preferredencoding()
1429 elif hasattr(out
, 'buffer'):
1431 enc
= encoding
or getattr(out
, 'encoding', None) or preferredencoding()
1433 buffer.write(s
.encode(enc
, 'ignore') if enc
else s
)
1437 # TODO: Use global logger
1438 def deprecation_warning(msg
, *, printer
=None, stacklevel
=0, **kwargs
):
1439 from .. import _IN_CLI
1441 if msg
in deprecation_warning
._cache
:
1443 deprecation_warning
._cache
.add(msg
)
1445 return printer(f
'{msg}{bug_reports_message()}', **kwargs
)
1446 return write_string(f
'ERROR: {msg}{bug_reports_message()}\n', **kwargs
)
1449 warnings
.warn(DeprecationWarning(msg
), stacklevel
=stacklevel
+ 3)
1452 deprecation_warning
._cache
= set()
1455 def bytes_to_intlist(bs
):
1458 if isinstance(bs
[0], int): # Python 3
1461 return [ord(c
) for c
in bs
]
1464 def intlist_to_bytes(xs
):
1467 return struct
.pack('%dB' % len(xs
), *xs
)
1470 class LockingUnsupportedError(OSError):
1471 msg
= 'File locking is not supported'
1474 super().__init
__(self
.msg
)
1477 # Cross-platform file locking
1478 if sys
.platform
== 'win32':
1480 import ctypes
.wintypes
1483 class OVERLAPPED(ctypes
.Structure
):
1485 ('Internal', ctypes
.wintypes
.LPVOID
),
1486 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
1487 ('Offset', ctypes
.wintypes
.DWORD
),
1488 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
1489 ('hEvent', ctypes
.wintypes
.HANDLE
),
1492 kernel32
= ctypes
.WinDLL('kernel32')
1493 LockFileEx
= kernel32
.LockFileEx
1494 LockFileEx
.argtypes
= [
1495 ctypes
.wintypes
.HANDLE
, # hFile
1496 ctypes
.wintypes
.DWORD
, # dwFlags
1497 ctypes
.wintypes
.DWORD
, # dwReserved
1498 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
1499 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
1500 ctypes
.POINTER(OVERLAPPED
) # Overlapped
1502 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
1503 UnlockFileEx
= kernel32
.UnlockFileEx
1504 UnlockFileEx
.argtypes
= [
1505 ctypes
.wintypes
.HANDLE
, # hFile
1506 ctypes
.wintypes
.DWORD
, # dwReserved
1507 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
1508 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
1509 ctypes
.POINTER(OVERLAPPED
) # Overlapped
1511 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
1512 whole_low
= 0xffffffff
1513 whole_high
= 0x7fffffff
1515 def _lock_file(f
, exclusive
, block
):
1516 overlapped
= OVERLAPPED()
1517 overlapped
.Offset
= 0
1518 overlapped
.OffsetHigh
= 0
1519 overlapped
.hEvent
= 0
1520 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
1522 if not LockFileEx(msvcrt
.get_osfhandle(f
.fileno()),
1523 (0x2 if exclusive
else 0x0) |
(0x0 if block
else 0x1),
1524 0, whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1525 # NB: No argument form of "ctypes.FormatError" does not work on PyPy
1526 raise BlockingIOError(f
'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
1528 def _unlock_file(f
):
1529 assert f
._lock
_file
_overlapped
_p
1530 handle
= msvcrt
.get_osfhandle(f
.fileno())
1531 if not UnlockFileEx(handle
, 0, whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1532 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
1538 def _lock_file(f
, exclusive
, block
):
1539 flags
= fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
1541 flags |
= fcntl
.LOCK_NB
1543 fcntl
.flock(f
, flags
)
1544 except BlockingIOError
:
1546 except OSError: # AOSP does not have flock()
1547 fcntl
.lockf(f
, flags
)
1549 def _unlock_file(f
):
1550 with contextlib
.suppress(OSError):
1551 return fcntl
.flock(f
, fcntl
.LOCK_UN
)
1552 with contextlib
.suppress(OSError):
1553 return fcntl
.lockf(f
, fcntl
.LOCK_UN
) # AOSP does not have flock()
1554 return fcntl
.flock(f
, fcntl
.LOCK_UN | fcntl
.LOCK_NB
) # virtiofs needs LOCK_NB on unlocking
1558 def _lock_file(f
, exclusive
, block
):
1559 raise LockingUnsupportedError()
1561 def _unlock_file(f
):
1562 raise LockingUnsupportedError()
1568 def __init__(self
, filename
, mode
, block
=True, encoding
=None):
1569 if mode
not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}
:
1570 raise NotImplementedError(mode
)
1571 self
.mode
, self
.block
= mode
, block
1573 writable
= any(f
in mode
for f
in 'wax+')
1574 readable
= any(f
in mode
for f
in 'r+')
1575 flags
= functools
.reduce(operator
.ior
, (
1576 getattr(os
, 'O_CLOEXEC', 0), # UNIX only
1577 getattr(os
, 'O_BINARY', 0), # Windows only
1578 getattr(os
, 'O_NOINHERIT', 0), # Windows only
1579 os
.O_CREAT
if writable
else 0, # O_TRUNC only after locking
1580 os
.O_APPEND
if 'a' in mode
else 0,
1581 os
.O_EXCL
if 'x' in mode
else 0,
1582 os
.O_RDONLY
if not writable
else os
.O_RDWR
if readable
else os
.O_WRONLY
,
1585 self
.f
= os
.fdopen(os
.open(filename
, flags
, 0o666), mode
, encoding
=encoding
)
1587 def __enter__(self
):
1588 exclusive
= 'r' not in self
.mode
1590 _lock_file(self
.f
, exclusive
, self
.block
)
1595 if 'w' in self
.mode
:
1598 except OSError as e
:
1600 errno
.ESPIPE
, # Illegal seek - expected for FIFO
1601 errno
.EINVAL
, # Invalid argument - expected for /dev/null
1610 _unlock_file(self
.f
)
1614 def __exit__(self
, *_
):
1623 def __getattr__(self
, attr
):
1624 return getattr(self
.f
, attr
)
1631 def get_filesystem_encoding():
1632 encoding
= sys
.getfilesystemencoding()
1633 return encoding
if encoding
is not None else 'utf-8'
1636 def shell_quote(args
):
1638 encoding
= get_filesystem_encoding()
1640 if isinstance(a
, bytes):
1641 # We may get a filename encoded with 'encodeFilename'
1642 a
= a
.decode(encoding
)
1643 quoted_args
.append(compat_shlex_quote(a
))
1644 return ' '.join(quoted_args
)
1647 def smuggle_url(url
, data
):
1648 """ Pass additional data in a URL for internal use. """
1650 url
, idata
= unsmuggle_url(url
, {})
1652 sdata
= urllib
.parse
.urlencode(
1653 {'__youtubedl_smuggle': json.dumps(data)}
)
1654 return url
+ '#' + sdata
1657 def unsmuggle_url(smug_url
, default
=None):
1658 if '#__youtubedl_smuggle' not in smug_url
:
1659 return smug_url
, default
1660 url
, _
, sdata
= smug_url
.rpartition('#')
1661 jsond
= urllib
.parse
.parse_qs(sdata
)['__youtubedl_smuggle'][0]
1662 data
= json
.loads(jsond
)
1666 def format_decimal_suffix(num
, fmt
='%d%s', *, factor
=1000):
1667 """ Formats numbers with decimal sufixes like K, M, etc """
1668 num
, factor
= float_or_none(num
), float(factor
)
1669 if num
is None or num
< 0:
1671 POSSIBLE_SUFFIXES
= 'kMGTPEZY'
1672 exponent
= 0 if num
== 0 else min(int(math
.log(num
, factor
)), len(POSSIBLE_SUFFIXES
))
1673 suffix
= ['', *POSSIBLE_SUFFIXES
][exponent
]
1675 suffix
= {'k': 'Ki', '': ''}
.get(suffix
, f
'{suffix}i')
1676 converted
= num
/ (factor
** exponent
)
1677 return fmt
% (converted
, suffix
)
1680 def format_bytes(bytes):
1681 return format_decimal_suffix(bytes, '%.2f%sB', factor
=1024) or 'N/A'
1684 def lookup_unit_table(unit_table
, s
, strict
=False):
1685 num_re
= NUMBER_RE
if strict
else NUMBER_RE
.replace(R
'\.', '[,.]')
1686 units_re
= '|'.join(re
.escape(u
) for u
in unit_table
)
1687 m
= (re
.fullmatch
if strict
else re
.match
)(
1688 rf
'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s
)
1692 num
= float(m
.group('num').replace(',', '.'))
1693 mult
= unit_table
[m
.group('unit')]
1694 return round(num
* mult
)
1698 """Parse a string indicating a byte quantity into an integer"""
1699 return lookup_unit_table(
1700 {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])}
,
1701 s
.upper(), strict
=True)
1704 def parse_filesize(s
):
1708 # The lower-case forms are of course incorrect and unofficial,
1709 # but we support those too
1726 'megabytes': 1000 ** 2,
1727 'mebibytes': 1024 ** 2,
1733 'gigabytes': 1000 ** 3,
1734 'gibibytes': 1024 ** 3,
1740 'terabytes': 1000 ** 4,
1741 'tebibytes': 1024 ** 4,
1747 'petabytes': 1000 ** 5,
1748 'pebibytes': 1024 ** 5,
1754 'exabytes': 1000 ** 6,
1755 'exbibytes': 1024 ** 6,
1761 'zettabytes': 1000 ** 7,
1762 'zebibytes': 1024 ** 7,
1768 'yottabytes': 1000 ** 8,
1769 'yobibytes': 1024 ** 8,
1772 return lookup_unit_table(_UNIT_TABLE
, s
)
1779 s
= re
.sub(r
'^[^\d]+\s', '', s
).strip()
1781 if re
.match(r
'^[\d,.]+$', s
):
1782 return str_to_int(s
)
1795 ret
= lookup_unit_table(_UNIT_TABLE
, s
)
1799 mobj
= re
.match(r
'([\d,.]+)(?:$|\s)', s
)
1801 return str_to_int(mobj
.group(1))
1804 def parse_resolution(s
, *, lenient
=False):
1809 mobj
= re
.search(r
'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s
)
1811 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s
)
1814 'width': int(mobj
.group('w')),
1815 'height': int(mobj
.group('h')),
1818 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s
)
1820 return {'height': int(mobj.group(1))}
1822 mobj
= re
.search(r
'\b([48])[kK]\b', s
)
1824 return {'height': int(mobj.group(1)) * 540}
1829 def parse_bitrate(s
):
1830 if not isinstance(s
, str):
1832 mobj
= re
.search(r
'\b(\d+)\s*kbps', s
)
1834 return int(mobj
.group(1))
1837 def month_by_name(name
, lang
='en'):
1838 """ Return the number of a month by (locale-independently) English name """
1840 month_names
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en'])
1843 return month_names
.index(name
) + 1
1848 def month_by_abbreviation(abbrev
):
1849 """ Return the number of a month by (locale-independently) English
1853 return [s
[:3] for s
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1
1858 def fix_xml_ampersands(xml_str
):
1859 """Replace all the '&' by '&' in XML"""
1861 r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1866 def setproctitle(title
):
1867 assert isinstance(title
, str)
1869 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
1876 libc
= ctypes
.cdll
.LoadLibrary('libc.so.6')
1880 # LoadLibrary in Windows Python 2.7.13 only expects
1881 # a bytestring, but since unicode_literals turns
1882 # every string into a unicode string, it fails.
1884 title_bytes
= title
.encode()
1885 buf
= ctypes
.create_string_buffer(len(title_bytes
))
1886 buf
.value
= title_bytes
1888 libc
.prctl(15, buf
, 0, 0, 0)
1889 except AttributeError:
1890 return # Strange libc, just skip this
1893 def remove_start(s
, start
):
1894 return s
[len(start
):] if s
is not None and s
.startswith(start
) else s
1897 def remove_end(s
, end
):
1898 return s
[:-len(end
)] if s
is not None and s
.endswith(end
) else s
1901 def remove_quotes(s
):
1902 if s
is None or len(s
) < 2:
1904 for quote
in ('"', "'", ):
1905 if s
[0] == quote
and s
[-1] == quote
:
1910 def get_domain(url
):
1912 This implementation is inconsistent, but is kept for compatibility.
1913 Use this only for "webpage_url_domain"
1915 return remove_start(urllib
.parse
.urlparse(url
).netloc
, 'www.') or None
1918 def url_basename(url
):
1919 path
= urllib
.parse
.urlparse(url
).path
1920 return path
.strip('/').split('/')[-1]
1924 return re
.match(r
'https?://[^?#]+/', url
).group()
1927 def urljoin(base
, path
):
1928 if isinstance(path
, bytes):
1929 path
= path
.decode()
1930 if not isinstance(path
, str) or not path
:
1932 if re
.match(r
'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path
):
1934 if isinstance(base
, bytes):
1935 base
= base
.decode()
1936 if not isinstance(base
, str) or not re
.match(
1937 r
'^(?:https?:)?//', base
):
1939 return urllib
.parse
.urljoin(base
, path
)
1942 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1):
1943 if get_attr
and v
is not None:
1944 v
= getattr(v
, get_attr
, None)
1946 return int(v
) * invscale
// scale
1947 except (ValueError, TypeError, OverflowError):
1951 def str_or_none(v
, default
=None):
1952 return default
if v
is None else str(v
)
1955 def str_to_int(int_str
):
1956 """ A more relaxed version of int_or_none """
1957 if isinstance(int_str
, int):
1959 elif isinstance(int_str
, str):
1960 int_str
= re
.sub(r
'[,\.\+]', '', int_str
)
1961 return int_or_none(int_str
)
1964 def float_or_none(v
, scale
=1, invscale
=1, default
=None):
1968 return float(v
) * invscale
/ scale
1969 except (ValueError, TypeError):
1973 def bool_or_none(v
, default
=None):
1974 return v
if isinstance(v
, bool) else default
1977 def strip_or_none(v
, default
=None):
1978 return v
.strip() if isinstance(v
, str) else default
1981 def url_or_none(url
):
1982 if not url
or not isinstance(url
, str):
1985 return url
if re
.match(r
'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url
) else None
1988 def strftime_or_none(timestamp
, date_format
='%Y%m%d', default
=None):
1989 datetime_object
= None
1991 if isinstance(timestamp
, (int, float)): # unix timestamp
1992 # Using naive datetime here can break timestamp() in Windows
1993 # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
1994 # Also, datetime.datetime.fromtimestamp breaks for negative timestamps
1995 # Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
1996 datetime_object
= (datetime
.datetime
.fromtimestamp(0, datetime
.timezone
.utc
)
1997 + datetime
.timedelta(seconds
=timestamp
))
1998 elif isinstance(timestamp
, str): # assume YYYYMMDD
1999 datetime_object
= datetime
.datetime
.strptime(timestamp
, '%Y%m%d')
2000 date_format
= re
.sub( # Support %s on windows
2001 r
'(?<!%)(%%)*%s', rf
'\g<1>{int(datetime_object.timestamp())}', date_format
)
2002 return datetime_object
.strftime(date_format
)
2003 except (ValueError, TypeError, AttributeError):
2007 def parse_duration(s
):
2008 if not isinstance(s
, str):
2014 days
, hours
, mins
, secs
, ms
= [None] * 5
2015 m
= re
.match(r
'''(?x)
2017 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2018 (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2019 (?P<ms>[.:][0-9]+)?Z?$
2022 days
, hours
, mins
, secs
, ms
= m
.group('days', 'hours', 'mins', 'secs', 'ms')
2027 [0-9]+\s*y(?:ears?)?,?\s*
2030 [0-9]+\s*m(?:onths?)?,?\s*
2033 [0-9]+\s*w(?:eeks?)?,?\s*
2036 (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2040 (?P<hours>[0-9]+)\s*h(?:(?:ou)?rs?)?,?\s*
2043 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2046 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2049 days
, hours
, mins
, secs
, ms
= m
.groups()
2051 m
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
)
2053 hours
, mins
= m
.groups()
2058 ms
= ms
.replace(':', '.')
2059 return sum(float(part
or 0) * mult
for part
, mult
in (
2060 (days
, 86400), (hours
, 3600), (mins
, 60), (secs
, 1), (ms
, 1)))
2063 def prepend_extension(filename
, ext
, expected_real_ext
=None):
2064 name
, real_ext
= os
.path
.splitext(filename
)
2066 f
'{name}.{ext}{real_ext}'
2067 if not expected_real_ext
or real_ext
[1:] == expected_real_ext
2068 else f
'{filename}.{ext}')
2071 def replace_extension(filename
, ext
, expected_real_ext
=None):
2072 name
, real_ext
= os
.path
.splitext(filename
)
2073 return '{}.{}'.format(
2074 name
if not expected_real_ext
or real_ext
[1:] == expected_real_ext
else filename
,
2078 def check_executable(exe
, args
=[]):
2079 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2080 args can be a list of arguments for a short output (like -version) """
2082 Popen
.run([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2088 def _get_exe_version_output(exe
, args
):
2090 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2091 # SIGTTOU if yt-dlp is run in the background.
2092 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2093 stdout
, _
, ret
= Popen
.run([encodeArgument(exe
)] + args
, text
=True,
2094 stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
)
2102 def detect_exe_version(output
, version_re
=None, unrecognized
='present'):
2103 assert isinstance(output
, str)
2104 if version_re
is None:
2105 version_re
= r
'version\s+([-0-9._a-zA-Z]+)'
2106 m
= re
.search(version_re
, output
)
2113 def get_exe_version(exe
, args
=['--version'],
2114 version_re
=None, unrecognized
=('present', 'broken')):
2115 """ Returns the version of the specified executable,
2116 or False if the executable is not present """
2117 unrecognized
= variadic(unrecognized
)
2118 assert len(unrecognized
) in (1, 2)
2119 out
= _get_exe_version_output(exe
, args
)
2121 return unrecognized
[-1]
2122 return out
and detect_exe_version(out
, version_re
, unrecognized
[0])
2125 def frange(start
=0, stop
=None, step
=1):
2128 start
, stop
= 0, start
2129 sign
= [-1, 1][step
> 0] if step
else 0
2130 while sign
* start
< sign
* stop
:
2135 class LazyList(collections
.abc
.Sequence
):
2136 """Lazy immutable list from an iterable
2137 Note that slices of a LazyList are lists and not LazyList"""
2139 class IndexError(IndexError):
2142 def __init__(self
, iterable
, *, reverse
=False, _cache
=None):
2143 self
._iterable
= iter(iterable
)
2144 self
._cache
= [] if _cache
is None else _cache
2145 self
._reversed
= reverse
2149 # We need to consume the entire iterable to iterate in reverse
2150 yield from self
.exhaust()
2152 yield from self
._cache
2153 for item
in self
._iterable
:
2154 self
._cache
.append(item
)
2158 self
._cache
.extend(self
._iterable
)
2159 self
._iterable
= [] # Discard the emptied iterable to make it pickle-able
2163 """Evaluate the entire iterable"""
2164 return self
._exhaust
()[::-1 if self
._reversed
else 1]
2167 def _reverse_index(x
):
2168 return None if x
is None else ~x
2170 def __getitem__(self
, idx
):
2171 if isinstance(idx
, slice):
2173 idx
= slice(self
._reverse
_index
(idx
.start
), self
._reverse
_index
(idx
.stop
), -(idx
.step
or 1))
2174 start
, stop
, step
= idx
.start
, idx
.stop
, idx
.step
or 1
2175 elif isinstance(idx
, int):
2177 idx
= self
._reverse
_index
(idx
)
2178 start
, stop
, step
= idx
, idx
, 0
2180 raise TypeError('indices must be integers or slices')
2181 if ((start
or 0) < 0 or (stop
or 0) < 0
2182 or (start
is None and step
< 0)
2183 or (stop
is None and step
> 0)):
2184 # We need to consume the entire iterable to be able to slice from the end
2185 # Obviously, never use this with infinite iterables
2188 return self
._cache
[idx
]
2189 except IndexError as e
:
2190 raise self
.IndexError(e
) from e
2191 n
= max(start
or 0, stop
or 0) - len(self
._cache
) + 1
2193 self
._cache
.extend(itertools
.islice(self
._iterable
, n
))
2195 return self
._cache
[idx
]
2196 except IndexError as e
:
2197 raise self
.IndexError(e
) from e
2201 self
[-1] if self
._reversed
else self
[0]
2202 except self
.IndexError:
2208 return len(self
._cache
)
2210 def __reversed__(self
):
2211 return type(self
)(self
._iterable
, reverse
=not self
._reversed
, _cache
=self
._cache
)
2214 return type(self
)(self
._iterable
, reverse
=self
._reversed
, _cache
=self
._cache
)
2217 # repr and str should mimic a list. So we exhaust the iterable
2218 return repr(self
.exhaust())
2221 return repr(self
.exhaust())
2226 class IndexError(IndexError):
2230 # This is only useful for tests
2231 return len(self
.getslice())
2233 def __init__(self
, pagefunc
, pagesize
, use_cache
=True):
2234 self
._pagefunc
= pagefunc
2235 self
._pagesize
= pagesize
2236 self
._pagecount
= float('inf')
2237 self
._use
_cache
= use_cache
2240 def getpage(self
, pagenum
):
2241 page_results
= self
._cache
.get(pagenum
)
2242 if page_results
is None:
2243 page_results
= [] if pagenum
> self
._pagecount
else list(self
._pagefunc
(pagenum
))
2245 self
._cache
[pagenum
] = page_results
2248 def getslice(self
, start
=0, end
=None):
2249 return list(self
._getslice
(start
, end
))
2251 def _getslice(self
, start
, end
):
2252 raise NotImplementedError('This method must be implemented by subclasses')
2254 def __getitem__(self
, idx
):
2255 assert self
._use
_cache
, 'Indexing PagedList requires cache'
2256 if not isinstance(idx
, int) or idx
< 0:
2257 raise TypeError('indices must be non-negative integers')
2258 entries
= self
.getslice(idx
, idx
+ 1)
2260 raise self
.IndexError()
2264 class OnDemandPagedList(PagedList
):
2265 """Download pages until a page with less than maximum results"""
2267 def _getslice(self
, start
, end
):
2268 for pagenum
in itertools
.count(start
// self
._pagesize
):
2269 firstid
= pagenum
* self
._pagesize
2270 nextfirstid
= pagenum
* self
._pagesize
+ self
._pagesize
2271 if start
>= nextfirstid
:
2275 start
% self
._pagesize
2276 if firstid
<= start
< nextfirstid
2279 ((end
- 1) % self
._pagesize
) + 1
2280 if (end
is not None and firstid
<= end
<= nextfirstid
)
2284 page_results
= self
.getpage(pagenum
)
2286 self
._pagecount
= pagenum
- 1
2288 if startv
!= 0 or endv
is not None:
2289 page_results
= page_results
[startv
:endv
]
2290 yield from page_results
2292 # A little optimization - if current page is not "full", ie. does
2293 # not contain page_size videos then we can assume that this page
2294 # is the last one - there are no more ids on further pages -
2295 # i.e. no need to query again.
2296 if len(page_results
) + startv
< self
._pagesize
:
2299 # If we got the whole page, but the next page is not interesting,
2300 # break out early as well
2301 if end
== nextfirstid
:
2305 class InAdvancePagedList(PagedList
):
2306 """PagedList with total number of pages known in advance"""
2308 def __init__(self
, pagefunc
, pagecount
, pagesize
):
2309 PagedList
.__init
__(self
, pagefunc
, pagesize
, True)
2310 self
._pagecount
= pagecount
2312 def _getslice(self
, start
, end
):
2313 start_page
= start
// self
._pagesize
2314 end_page
= self
._pagecount
if end
is None else min(self
._pagecount
, end
// self
._pagesize
+ 1)
2315 skip_elems
= start
- start_page
* self
._pagesize
2316 only_more
= None if end
is None else end
- start
2317 for pagenum
in range(start_page
, end_page
):
2318 page_results
= self
.getpage(pagenum
)
2320 page_results
= page_results
[skip_elems
:]
2322 if only_more
is not None:
2323 if len(page_results
) < only_more
:
2324 only_more
-= len(page_results
)
2326 yield from page_results
[:only_more
]
2328 yield from page_results
2331 class PlaylistEntries
:
2332 MissingEntry
= object()
2333 is_exhausted
= False
2335 def __init__(self
, ydl
, info_dict
):
2338 # _entries must be assigned now since infodict can change during iteration
2339 entries
= info_dict
.get('entries')
2341 raise EntryNotInPlaylist('There are no entries')
2342 elif isinstance(entries
, list):
2343 self
.is_exhausted
= True
2345 requested_entries
= info_dict
.get('requested_entries')
2346 self
.is_incomplete
= requested_entries
is not None
2347 if self
.is_incomplete
:
2348 assert self
.is_exhausted
2349 self
._entries
= [self
.MissingEntry
] * max(requested_entries
or [0])
2350 for i
, entry
in zip(requested_entries
, entries
):
2351 self
._entries
[i
- 1] = entry
2352 elif isinstance(entries
, (list, PagedList
, LazyList
)):
2353 self
._entries
= entries
2355 self
._entries
= LazyList(entries
)
2357 PLAYLIST_ITEMS_RE
= re
.compile(r
'''(?x)
2358 (?P<start>[+-]?\d+)?
2360 (?P<end>[+-]?\d+|inf(?:inite)?)?
2361 (?::(?P<step>[+-]?\d+))?
2365 def parse_playlist_items(cls
, string
):
2366 for segment
in string
.split(','):
2368 raise ValueError('There is two or more consecutive commas')
2369 mobj
= cls
.PLAYLIST_ITEMS_RE
.fullmatch(segment
)
2371 raise ValueError(f
'{segment!r} is not a valid specification')
2372 start
, end
, step
, has_range
= mobj
.group('start', 'end', 'step', 'range')
2373 if int_or_none(step
) == 0:
2374 raise ValueError(f
'Step in {segment!r} cannot be zero')
2375 yield slice(int_or_none(start
), float_or_none(end
), int_or_none(step
)) if has_range
else int(start
)
2377 def get_requested_items(self
):
2378 playlist_items
= self
.ydl
.params
.get('playlist_items')
2379 playlist_start
= self
.ydl
.params
.get('playliststart', 1)
2380 playlist_end
= self
.ydl
.params
.get('playlistend')
2381 # For backwards compatibility, interpret -1 as whole list
2382 if playlist_end
in (-1, None):
2384 if not playlist_items
:
2385 playlist_items
= f
'{playlist_start}:{playlist_end}'
2386 elif playlist_start
!= 1 or playlist_end
:
2387 self
.ydl
.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once
=True)
2389 for index
in self
.parse_playlist_items(playlist_items
):
2390 for i
, entry
in self
[index
]:
2395 # The item may have just been added to archive. Don't break due to it
2396 if not self
.ydl
.params
.get('lazy_playlist'):
2397 # TODO: Add auto-generated fields
2398 self
.ydl
._match
_entry
(entry
, incomplete
=True, silent
=True)
2399 except (ExistingVideoReached
, RejectedVideoReached
):
2402 def get_full_count(self
):
2403 if self
.is_exhausted
and not self
.is_incomplete
:
2405 elif isinstance(self
._entries
, InAdvancePagedList
):
2406 if self
._entries
._pagesize
== 1:
2407 return self
._entries
._pagecount
2409 @functools.cached_property
2411 if isinstance(self
._entries
, list):
2414 entry
= self
._entries
[i
]
2416 entry
= self
.MissingEntry
2417 if not self
.is_incomplete
:
2418 raise self
.IndexError()
2419 if entry
is self
.MissingEntry
:
2420 raise EntryNotInPlaylist(f
'Entry {i + 1} cannot be found')
2425 return type(self
.ydl
)._handle
_extraction
_exceptions
(lambda _
, i
: self
._entries
[i
])(self
.ydl
, i
)
2426 except (LazyList
.IndexError, PagedList
.IndexError):
2427 raise self
.IndexError()
2430 def __getitem__(self
, idx
):
2431 if isinstance(idx
, int):
2432 idx
= slice(idx
, idx
)
2434 # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
2435 step
= 1 if idx
.step
is None else idx
.step
2436 if idx
.start
is None:
2437 start
= 0 if step
> 0 else len(self
) - 1
2439 start
= idx
.start
- 1 if idx
.start
>= 0 else len(self
) + idx
.start
2441 # NB: Do not call len(self) when idx == [:]
2442 if idx
.stop
is None:
2443 stop
= 0 if step
< 0 else float('inf')
2445 stop
= idx
.stop
- 1 if idx
.stop
>= 0 else len(self
) + idx
.stop
2446 stop
+= [-1, 1][step
> 0]
2448 for i
in frange(start
, stop
, step
):
2452 entry
= self
._getter
(i
)
2453 except self
.IndexError:
2454 self
.is_exhausted
= True
2461 return len(tuple(self
[:]))
2463 class IndexError(IndexError):
2467 def uppercase_escape(s
):
2468 unicode_escape
= codecs
.getdecoder('unicode_escape')
2470 r
'\\U[0-9a-fA-F]{8}',
2471 lambda m
: unicode_escape(m
.group(0))[0],
2475 def lowercase_escape(s
):
2476 unicode_escape
= codecs
.getdecoder('unicode_escape')
2478 r
'\\u[0-9a-fA-F]{4}',
2479 lambda m
: unicode_escape(m
.group(0))[0],
2483 def parse_qs(url
, **kwargs
):
2484 return urllib
.parse
.parse_qs(urllib
.parse
.urlparse(url
).query
, **kwargs
)
2487 def read_batch_urls(batch_fd
):
2489 if not isinstance(url
, str):
2490 url
= url
.decode('utf-8', 'replace')
2491 BOM_UTF8
= ('\xef\xbb\xbf', '\ufeff')
2492 for bom
in BOM_UTF8
:
2493 if url
.startswith(bom
):
2494 url
= url
[len(bom
):]
2496 if not url
or url
.startswith(('#', ';', ']')):
2498 # "#" cannot be stripped out since it is part of the URI
2499 # However, it can be safely stripped out if following a whitespace
2500 return re
.split(r
'\s#', url
, 1)[0].rstrip()
2502 with contextlib
.closing(batch_fd
) as fd
:
2503 return [url
for url
in map(fixup
, fd
) if url
]
2506 def urlencode_postdata(*args
, **kargs
):
2507 return urllib
.parse
.urlencode(*args
, **kargs
).encode('ascii')
2510 def update_url(url
, *, query_update
=None, **kwargs
):
2511 """Replace URL components specified by kwargs
2512 @param url str or parse url tuple
2513 @param query_update update query
2516 if isinstance(url
, str):
2517 if not kwargs
and not query_update
:
2520 url
= urllib
.parse
.urlparse(url
)
2522 assert 'query' not in kwargs
, 'query_update and query cannot be specified at the same time'
2523 kwargs
['query'] = urllib
.parse
.urlencode({
2524 **urllib
.parse
.parse_qs(url
.query
),
2527 return urllib
.parse
.urlunparse(url
._replace
(**kwargs
))
2530 def update_url_query(url
, query
):
2531 return update_url(url
, query_update
=query
)
2534 def _multipart_encode_impl(data
, boundary
):
2535 content_type
= 'multipart/form-data; boundary=%s' % boundary
2538 for k
, v
in data
.items():
2539 out
+= b
'--' + boundary
.encode('ascii') + b
'\r\n'
2540 if isinstance(k
, str):
2542 if isinstance(v
, str):
2544 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
2545 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
2546 content
= b
'Content-Disposition: form-data; name="' + k
+ b
'"\r\n\r\n' + v
+ b
'\r\n'
2547 if boundary
.encode('ascii') in content
:
2548 raise ValueError('Boundary overlaps with data')
2551 out
+= b
'--' + boundary
.encode('ascii') + b
'--\r\n'
2553 return out
, content_type
2556 def multipart_encode(data
, boundary
=None):
2558 Encode a dict to RFC 7578-compliant form-data
2561 A dict where keys and values can be either Unicode or bytes-like
2564 If specified a Unicode object, it's used as the boundary. Otherwise
2565 a random boundary is generated.
2567 Reference: https://tools.ietf.org/html/rfc7578
2569 has_specified_boundary
= boundary
is not None
2572 if boundary
is None:
2573 boundary
= '---------------' + str(random
.randrange(0x0fffffff, 0xffffffff))
2576 out
, content_type
= _multipart_encode_impl(data
, boundary
)
2579 if has_specified_boundary
:
2583 return out
, content_type
2586 def is_iterable_like(x
, allowed_types
=collections
.abc
.Iterable
, blocked_types
=NO_DEFAULT
):
2587 if blocked_types
is NO_DEFAULT
:
2588 blocked_types
= (str, bytes, collections
.abc
.Mapping
)
2589 return isinstance(x
, allowed_types
) and not isinstance(x
, blocked_types
)
2592 def variadic(x
, allowed_types
=NO_DEFAULT
):
2593 if not isinstance(allowed_types
, (tuple, type)):
2594 deprecation_warning('allowed_types should be a tuple or a type')
2595 allowed_types
= tuple(allowed_types
)
2596 return x
if is_iterable_like(x
, blocked_types
=allowed_types
) else (x
, )
2599 def try_call(*funcs
, expected_type
=None, args
=[], kwargs
={}):
2602 val
= f(*args
, **kwargs
)
2603 except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
2606 if expected_type
is None or isinstance(val
, expected_type
):
2610 def try_get(src
, getter
, expected_type
=None):
2611 return try_call(*variadic(getter
), args
=(src
,), expected_type
=expected_type
)
2614 def filter_dict(dct
, cndn
=lambda _
, v
: v
is not None):
2615 return {k: v for k, v in dct.items() if cndn(k, v)}
2618 def merge_dicts(*dicts
):
2620 for a_dict
in dicts
:
2621 for k
, v
in a_dict
.items():
2622 if (v
is not None and k
not in merged
2623 or isinstance(v
, str) and merged
[k
] == ''):
2628 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'):
2629 return string
if isinstance(string
, str) else str(string
, encoding
, errors
)
2641 TV_PARENTAL_GUIDELINES
= {
2651 def parse_age_limit(s
):
2652 # isinstance(False, int) is True. So type() must be used instead
2653 if type(s
) is int: # noqa: E721
2654 return s
if 0 <= s
<= 21 else None
2655 elif not isinstance(s
, str):
2657 m
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
)
2659 return int(m
.group('age'))
2662 return US_RATINGS
[s
]
2663 m
= re
.match(r
'^TV[_-]?(%s)$' % '|'.join(k
[3:] for k
in TV_PARENTAL_GUIDELINES
), s
)
2665 return TV_PARENTAL_GUIDELINES
['TV-' + m
.group(1)]
2669 def strip_jsonp(code
):
2672 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
2673 (?:\s*&&\s*(?P=func_name))?
2674 \s*\(\s*(?P<callback_data>.*)\);?
2675 \s*?(?://[^\n]*)*$''',
2676 r
'\g<callback_data>', code
)
2679 def js_to_json(code
, vars={}, *, strict
=False):
2680 # vars is a dict of var, val pairs to substitute
2681 STRING_QUOTES
= '\'"`'
2682 STRING_RE
= '|'.join(rf
'{q}(?:\\.|[^\\{q}])*{q}' for q
in STRING_QUOTES
)
2683 COMMENT_RE
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
2684 SKIP_RE
= fr
'\s*(?:{COMMENT_RE})?\s*'
2686 (fr
'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
2687 (fr
'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
2690 def process_escape(match
):
2691 JSON_PASSTHROUGH_ESCAPES
= R
'"\bfnrtu'
2692 escape
= match
.group(1) or match
.group(2)
2694 return (Rf
'\{escape}' if escape
in JSON_PASSTHROUGH_ESCAPES
2695 else R
'\u00' if escape
== 'x'
2696 else '' if escape
== '\n'
2699 def template_substitute(match
):
2700 evaluated
= js_to_json(match
.group(1), vars, strict
=strict
)
2701 if evaluated
[0] == '"':
2702 return json
.loads(evaluated
)
2707 if v
in ('true', 'false', 'null'):
2709 elif v
in ('undefined', 'void 0'):
2711 elif v
.startswith('/*') or v
.startswith('//') or v
.startswith('!') or v
== ',':
2714 if v
[0] in STRING_QUOTES
:
2715 v
= re
.sub(r
'(?s)\${([^}]+)}', template_substitute
, v
[1:-1]) if v
[0] == '`' else v
[1:-1]
2716 escaped
= re
.sub(r
'(?s)(")|\\(.)', process_escape
, v
)
2717 return f
'"{escaped}"'
2719 for regex
, base
in INTEGER_TABLE
:
2720 im
= re
.match(regex
, v
)
2722 i
= int(im
.group(1), base
)
2723 return f
'"{i}":' if v
.endswith(':') else str(i
)
2729 except json
.JSONDecodeError
:
2730 return json
.dumps(vars[v
])
2737 raise ValueError(f
'Unknown value: {v}')
2739 def create_map(mobj
):
2740 return json
.dumps(dict(json
.loads(js_to_json(mobj
.group(1) or '[]', vars=vars))))
2742 code
= re
.sub(r
'(?:new\s+)?Array\((.*?)\)', r
'[\g<1>]', code
)
2743 code
= re
.sub(r
'new Map\((\[.*?\])?\)', create_map
, code
)
2745 code
= re
.sub(rf
'new Date\(({STRING_RE})\)', r
'\g<1>', code
)
2746 code
= re
.sub(r
'new \w+\((.*?)\)', lambda m
: json
.dumps(m
.group(0)), code
)
2747 code
= re
.sub(r
'parseInt\([^\d]+(\d+)[^\d]+\)', r
'\1', code
)
2748 code
= re
.sub(r
'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^
)]*["\'])\s*\)', r'\1', code)
2750 return re.sub(rf'''(?sx)
2752 {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
2753 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
2754 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
2755 [0-9]+(?={SKIP_RE}:)|
2760 def qualities(quality_ids):
2761 """ Get a numeric quality value out of a list of possible values """
2764 return quality_ids.index(qid)
2770 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
2774 'default': '%(title)s [%(id)s].%(ext)s',
2775 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
2781 'description': 'description',
2782 'annotation': 'annotations.xml',
2783 'infojson': 'info.json',
2786 'pl_thumbnail': None,
2787 'pl_description': 'description',
2788 'pl_infojson': 'info.json',
2791 # As of [1] format syntax is:
2792 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
2793 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
2794 STR_FORMAT_RE_TMPL = r'''(?x)
2795 (?<!%)(?P<prefix>(?:%%)*)
2797 (?P<has_key>\((?P<key>{0})\))?
2799 (?P<conversion>[#0\-+ ]+)?
2801 (?P<precision>\.\d+)?
2802 (?P<len_mod>[hlL])? # unused in python
2803 {1} # conversion type
2808 STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'
2811 def limit_length(s, length):
2812 """ Add ellipses to overly long strings """
2817 return s[:length - len(ELLIPSES)] + ELLIPSES
2821 def version_tuple(v):
2822 return tuple(int(e) for e in re.split(r'[-.]', v))
2825 def is_outdated_version(version, limit, assume_new=True):
2827 return not assume_new
2829 return version_tuple(version) < version_tuple(limit)
2831 return not assume_new
2834 def ytdl_is_updateable():
2835 """ Returns if yt-dlp can be updated with -U """
2837 from ..update import is_non_updateable
2839 return not is_non_updateable()
2842 def args_to_str(args):
2843 # Get a short string representation for a subprocess command
2844 return ' '.join(compat_shlex_quote(a) for a in args)
2847 def error_to_str(err):
2848 return f'{type(err).__name__}: {err}'
2851 def mimetype2ext(mt, default=NO_DEFAULT):
2852 if not isinstance(mt, str):
2853 if default is not NO_DEFAULT:
2870 'x-matroska': 'mkv',
2872 'x-mp4-fragmented': 'mp4',
2877 # application (streaming playlists)
2881 'vnd.apple.mpegurl': 'm3u8',
2882 'vnd.ms-sstr+xml': 'ism',
2883 'x-mpegurl': 'm3u8',
2887 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
2888 # Using .mp3 as it's the most popular one
2889 'audio/mpeg': 'mp3',
2890 'audio/webm': 'webm',
2891 'audio/x-matroska': 'mka',
2892 'audio/x-mpegurl': 'm3u',
2900 'x-realaudio': 'ra',
2911 'vnd.wap.wbmp': 'wbmp',
2918 'filmstrip+json': 'fs',
2919 'smptett+xml': 'tt',
2922 'x-ms-sami': 'sami',
2931 mimetype = mt.partition(';')[0].strip().lower()
2932 _, _, subtype = mimetype.rpartition('/')
2934 ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
2937 elif default is not NO_DEFAULT:
2939 return subtype.replace('+', '.')
2942 def ext2mimetype(ext_or_url):
2945 if '.' not in ext_or_url:
2946 ext_or_url = f'file.{ext_or_url}'
2947 return mimetypes.guess_type(ext_or_url)[0]
2950 def parse_codecs(codecs_str):
2951 # http://tools.ietf.org/html/rfc6381
2954 split_codecs = list(filter(None, map(
2955 str.strip, codecs_str.strip().strip(',').split(','))))
2956 vcodec, acodec, scodec, hdr = None, None, None, None
2957 for full_codec in split_codecs:
2958 parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
2959 if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
2960 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
2964 if parts[0] in ('dvh1', 'dvhe'):
2966 elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
2968 elif parts[:2] == ['vp9', '2']:
2970 elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
2971 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
2972 acodec = acodec or full_codec
2973 elif parts[0] in ('stpp', 'wvtt'):
2974 scodec = scodec or full_codec
2976 write_string(f'WARNING: Unknown codec {full_codec}\n')
2977 if vcodec or acodec or scodec:
2979 'vcodec': vcodec or 'none',
2980 'acodec': acodec or 'none',
2981 'dynamic_range': hdr,
2982 **({'scodec': scodec} if scodec is not None else {}),
2984 elif len(split_codecs) == 2:
2986 'vcodec': split_codecs[0],
2987 'acodec': split_codecs[1],
2992 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
2993 assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
2995 allow_mkv = not preferences or 'mkv' in preferences
2997 if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
2998 return 'mkv' # TODO: any other format allows this?
3000 # TODO: All codecs supported by parse_codecs isn't handled here
3001 COMPATIBLE_CODECS = {
3003 'av1', 'hevc', 'avc1', 'mp4a', 'ac-4', # fourcc (m3u8, mpd)
3004 'h264', 'aacl', 'ec-3', # Set in ISM
3007 'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3008 'vp9x', 'vp8x', # in the webm spec
3012 sanitize_codec = functools.partial(
3013 try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
3014 vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3016 for ext in preferences or COMPATIBLE_CODECS.keys():
3017 codec_set = COMPATIBLE_CODECS.get(ext, set())
3018 if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3022 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3025 for ext in preferences or vexts:
3026 current_exts = {ext, *vexts, *aexts}
3027 if ext == 'mkv' or current_exts == {ext} or any(
3028 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3030 return 'mkv' if allow_mkv else preferences[-1]
3033 def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
3034 getheader = url_handle.headers.get
3036 cd = getheader('Content-Disposition')
3038 m = re.match(r'attachment;\s*filename="(?P
<filename
>[^
"]+)"', cd)
3040 e = determine_ext(m.group('filename
'), default_ext=None)
3044 meta_ext = getheader('x
-amz
-meta
-name
')
3046 e = meta_ext.rpartition('.')[2]
3050 return mimetype2ext(getheader('Content
-Type
'), default=default)
3053 def encode_data_uri(data, mime_type):
3054 return 'data
:%s;base64
,%s' % (mime_type, base64.b64encode(data).decode('ascii
'))
3057 def age_restricted(content_limit, age_limit):
3058 """ Returns True iff the content should be blocked """
3060 if age_limit is None: # No limit set
3062 if content_limit is None:
3063 return False # Content available for everyone
3064 return age_limit < content_limit
3067 # List of known byte-order-marks (BOM)
3069 (b'\xef\xbb\xbf', 'utf
-8'),
3070 (b'\x00\x00\xfe\xff', 'utf
-32-be
'),
3071 (b'\xff\xfe\x00\x00', 'utf
-32-le
'),
3072 (b'\xff\xfe', 'utf
-16-le
'),
3073 (b'\xfe\xff', 'utf
-16-be
'),
3077 def is_html(first_bytes):
3078 """ Detect whether a file contains HTML by examining its first bytes. """
3081 for bom, enc in BOMS:
3082 while first_bytes.startswith(bom):
3083 encoding, first_bytes = enc, first_bytes[len(bom):]
3085 return re.match(r'^\s
*<', first_bytes.decode(encoding, 'replace
'))
3088 def determine_protocol(info_dict):
3089 protocol = info_dict.get('protocol
')
3090 if protocol is not None:
3093 url = sanitize_url(info_dict['url
'])
3094 if url.startswith('rtmp
'):
3096 elif url.startswith('mms
'):
3098 elif url.startswith('rtsp
'):
3101 ext = determine_ext(url)
3103 return 'm3u8
' if info_dict.get('is_live
') else 'm3u8_native
'
3107 return urllib.parse.urlparse(url).scheme
3110 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3111 """ Render a list of rows, each as a list of values.
3112 Text after a \t will be right aligned """
3114 return len(remove_terminal_sequences(string).replace('\t', ''))
3116 def get_max_lens(table):
3117 return [max(width(str(v)) for v in col) for col in zip(*table)]
3119 def filter_using_list(row, filterArray):
3120 return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3122 max_lens = get_max_lens(data) if hide_empty else []
3123 header_row = filter_using_list(header_row, max_lens)
3124 data = [filter_using_list(row, max_lens) for row in data]
3126 table = [header_row] + data
3127 max_lens = get_max_lens(table)
3130 table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3131 table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
3133 for pos, text in enumerate(map(str, row)):
3135 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3137 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3138 ret = '\n'.join(''.join(row).rstrip() for row in table)
3142 def _match_one(filter_part, dct, incomplete):
3143 # TODO: Generalize code with YoutubeDL._build_format_filter
3144 STRING_OPERATORS = {
3145 '*=': operator.contains,
3146 '^
=': lambda attr, value: attr.startswith(value),
3147 '$
=': lambda attr, value: attr.endswith(value),
3148 '~
=': lambda attr, value: re.search(value, attr),
3150 COMPARISON_OPERATORS = {
3152 '<=': operator.le, # "<=" must be defined above "<"
3159 if isinstance(incomplete, bool):
3160 is_incomplete = lambda _: incomplete
3162 is_incomplete = lambda k: k in incomplete
3164 operator_rex = re.compile(r'''(?x)
3166 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3168 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3171 ''' % '|
'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3172 m = operator_rex.fullmatch(filter_part.strip())
3175 unnegated_op = COMPARISON_OPERATORS[m['op
']]
3177 op = lambda attr, value: not unnegated_op(attr, value)
3180 comparison_value = m['quotedstrval
'] or m['strval
'] or m['intval
']
3182 comparison_value = comparison_value.replace(r'\
%s' % m['quote
'], m['quote
'])
3183 actual_value = dct.get(m['key
'])
3184 numeric_comparison = None
3185 if isinstance(actual_value, (int, float)):
3186 # If the original field is a string and matching comparisonvalue is
3187 # a number we should respect the origin of the original field
3188 # and process comparison value as a string (see
3189 # https://github.com/ytdl-org/youtube-dl/issues/11082)
3191 numeric_comparison = int(comparison_value)
3193 numeric_comparison = parse_filesize(comparison_value)
3194 if numeric_comparison is None:
3195 numeric_comparison = parse_filesize(f'{comparison_value}B
')
3196 if numeric_comparison is None:
3197 numeric_comparison = parse_duration(comparison_value)
3198 if numeric_comparison is not None and m['op
'] in STRING_OPERATORS:
3199 raise ValueError('Operator
%s only supports string values
!' % m['op
'])
3200 if actual_value is None:
3201 return is_incomplete(m['key
']) or m['none_inclusive
']
3202 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3205 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3206 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3208 operator_rex = re.compile(r'''(?x)
3209 (?P<op>%s)\s*(?P<key>[a-z_]+)
3210 ''' % '|
'.join(map(re.escape, UNARY_OPERATORS.keys())))
3211 m = operator_rex.fullmatch(filter_part.strip())
3213 op = UNARY_OPERATORS[m.group('op
')]
3214 actual_value = dct.get(m.group('key
'))
3215 if is_incomplete(m.group('key
')) and actual_value is None:
3217 return op(actual_value)
3219 raise ValueError('Invalid
filter part
%r' % filter_part)
3222 def match_str(filter_str, dct, incomplete=False):
3223 """ Filter a dictionary with a simple string syntax.
3224 @returns Whether the filter passes
3225 @param incomplete Set of keys that is expected to be missing from dct.
3226 Can be True/False to indicate all/none of the keys may be missing.
3227 All conditions on incomplete keys pass if the key is missing
3230 _match_one(filter_part.replace(r'\
&', '&'), dct, incomplete)
3231 for filter_part in re.split(r'(?
<!\\)&', filter_str))
3234 def match_filter_func(filters, breaking_filters=None):
3235 if not filters and not breaking_filters:
3237 breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
3238 filters = set(variadic(filters or []))
3240 interactive = '-' in filters
3244 def _match_func(info_dict, incomplete=False):
3245 ret = breaking_filters(info_dict, incomplete)
3247 raise RejectedVideoReached(ret)
3249 if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3250 return NO_DEFAULT if interactive and not incomplete else None
3252 video_title = info_dict.get('title
') or info_dict.get('id') or 'entry
'
3253 filter_str = ') |
('.join(map(str.strip, filters))
3254 return f'{video_title} does
not pass filter ({filter_str}
), skipping
..'
3258 class download_range_func:
3259 def __init__(self, chapters, ranges, from_info=False):
3260 self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
3262 def __call__(self, info_dict, ydl):
3264 warning = ('There are no chapters matching the regex
' if info_dict.get('chapters
')
3265 else 'Cannot match chapters since chapter information
is unavailable
')
3266 for regex in self.chapters or []:
3267 for i, chapter in enumerate(info_dict.get('chapters
') or []):
3268 if re.search(regex, chapter['title
']):
3270 yield {**chapter, 'index': i}
3271 if self.chapters and warning:
3272 ydl.to_screen(f'[info
] {info_dict["id"]}
: {warning}
')
3274 for start, end in self.ranges or []:
3276 'start_time
': self._handle_negative_timestamp(start, info_dict),
3277 'end_time
': self._handle_negative_timestamp(end, info_dict),
3280 if self.from_info and (info_dict.get('start_time
') or info_dict.get('end_time
')):
3282 'start_time
': info_dict.get('start_time
') or 0,
3283 'end_time
': info_dict.get('end_time
') or float('inf
'),
3285 elif not self.ranges and not self.chapters:
3289 def _handle_negative_timestamp(time, info):
3290 return max(info['duration
'] + time, 0) if info.get('duration
') and time < 0 else time
3292 def __eq__(self, other):
3293 return (isinstance(other, download_range_func)
3294 and self.chapters == other.chapters and self.ranges == other.ranges)
3297 return f'{__name__}
.{type(self).__name__}
({self.chapters}
, {self.ranges}
)'
3300 def parse_dfxp_time_expr(time_expr):
3304 mobj = re.match(rf'^
(?P
<time_offset
>{NUMBER_RE}
)s?$
', time_expr)
3306 return float(mobj.group('time_offset
'))
3308 mobj = re.match(r'^
(\d
+):(\d\d
):(\d\
d(?
:(?
:\
.|
:)\d
+)?
)$
', time_expr)
3310 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3313 def srt_subtitles_timecode(seconds):
3314 return '%02d
:%02d
:%02d
,%03d
' % timetuple_from_msec(seconds * 1000)
3317 def ass_subtitles_timecode(seconds):
3318 time = timetuple_from_msec(seconds * 1000)
3319 return '%01d
:%02d
:%02d
.%02d
' % (*time[:-1], time.milliseconds / 10)
3322 def dfxp2srt(dfxp_data):
3324 @param dfxp_data A bytes-like object containing DFXP data
3325 @returns A unicode object containing converted SRT data
3327 LEGACY_NAMESPACES = (
3328 (b'http
://www
.w3
.org
/ns
/ttml
', [
3329 b'http
://www
.w3
.org
/2004/11/ttaf1
',
3330 b'http
://www
.w3
.org
/2006/04/ttaf1
',
3331 b'http
://www
.w3
.org
/2006/10/ttaf1
',
3333 (b'http
://www
.w3
.org
/ns
/ttml
#styling', [
3334 b
'http://www.w3.org/ns/ttml#style',
3338 SUPPORTED_STYLING
= [
3347 _x
= functools
.partial(xpath_with_ns
, ns_map
={
3348 'xml': 'http://www.w3.org/XML/1998/namespace',
3349 'ttml': 'http://www.w3.org/ns/ttml',
3350 'tts': 'http://www.w3.org/ns/ttml#styling',
3356 class TTMLPElementParser
:
3358 _unclosed_elements
= []
3359 _applied_styles
= []
3361 def start(self
, tag
, attrib
):
3362 if tag
in (_x('ttml:br'), 'br'):
3365 unclosed_elements
= []
3367 element_style_id
= attrib
.get('style')
3369 style
.update(default_style
)
3370 if element_style_id
:
3371 style
.update(styles
.get(element_style_id
, {}))
3372 for prop
in SUPPORTED_STYLING
:
3373 prop_val
= attrib
.get(_x('tts:' + prop
))
3375 style
[prop
] = prop_val
3378 for k
, v
in sorted(style
.items()):
3379 if self
._applied
_styles
and self
._applied
_styles
[-1].get(k
) == v
:
3382 font
+= ' color="%s"' % v
3383 elif k
== 'fontSize':
3384 font
+= ' size="%s"' % v
3385 elif k
== 'fontFamily':
3386 font
+= ' face="%s"' % v
3387 elif k
== 'fontWeight' and v
== 'bold':
3389 unclosed_elements
.append('b')
3390 elif k
== 'fontStyle' and v
== 'italic':
3392 unclosed_elements
.append('i')
3393 elif k
== 'textDecoration' and v
== 'underline':
3395 unclosed_elements
.append('u')
3397 self
._out
+= '<font' + font
+ '>'
3398 unclosed_elements
.append('font')
3400 if self
._applied
_styles
:
3401 applied_style
.update(self
._applied
_styles
[-1])
3402 applied_style
.update(style
)
3403 self
._applied
_styles
.append(applied_style
)
3404 self
._unclosed
_elements
.append(unclosed_elements
)
3407 if tag
not in (_x('ttml:br'), 'br'):
3408 unclosed_elements
= self
._unclosed
_elements
.pop()
3409 for element
in reversed(unclosed_elements
):
3410 self
._out
+= '</%s>' % element
3411 if unclosed_elements
and self
._applied
_styles
:
3412 self
._applied
_styles
.pop()
3414 def data(self
, data
):
3418 return self
._out
.strip()
3420 # Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
3421 # This will not trigger false positives since only UTF-8 text is being replaced
3422 dfxp_data
= dfxp_data
.replace(b
'encoding=\'UTF-16\'', b
'encoding=\'UTF-8\'')
3424 def parse_node(node
):
3425 target
= TTMLPElementParser()
3426 parser
= xml
.etree
.ElementTree
.XMLParser(target
=target
)
3427 parser
.feed(xml
.etree
.ElementTree
.tostring(node
))
3428 return parser
.close()
3430 for k
, v
in LEGACY_NAMESPACES
:
3432 dfxp_data
= dfxp_data
.replace(ns
, k
)
3434 dfxp
= compat_etree_fromstring(dfxp_data
)
3436 paras
= dfxp
.findall(_x('.//ttml:p')) or dfxp
.findall('.//p')
3439 raise ValueError('Invalid dfxp/TTML subtitle')
3443 for style
in dfxp
.findall(_x('.//ttml:style')):
3444 style_id
= style
.get('id') or style
.get(_x('xml:id'))
3447 parent_style_id
= style
.get('style')
3449 if parent_style_id
not in styles
:
3452 styles
[style_id
] = styles
[parent_style_id
].copy()
3453 for prop
in SUPPORTED_STYLING
:
3454 prop_val
= style
.get(_x('tts:' + prop
))
3456 styles
.setdefault(style_id
, {})[prop
] = prop_val
3462 for p
in ('body', 'div'):
3463 ele
= xpath_element(dfxp
, [_x('.//ttml:' + p
), './/' + p
])
3466 style
= styles
.get(ele
.get('style'))
3469 default_style
.update(style
)
3471 for para
, index
in zip(paras
, itertools
.count(1)):
3472 begin_time
= parse_dfxp_time_expr(para
.attrib
.get('begin'))
3473 end_time
= parse_dfxp_time_expr(para
.attrib
.get('end'))
3474 dur
= parse_dfxp_time_expr(para
.attrib
.get('dur'))
3475 if begin_time
is None:
3480 end_time
= begin_time
+ dur
3481 out
.append('%d\n%s --> %s\n%s\n\n' % (
3483 srt_subtitles_timecode(begin_time
),
3484 srt_subtitles_timecode(end_time
),
3490 def cli_option(params
, command_option
, param
, separator
=None):
3491 param
= params
.get(param
)
3492 return ([] if param
is None
3493 else [command_option
, str(param
)] if separator
is None
3494 else [f
'{command_option}{separator}{param}'])
3497 def cli_bool_option(params
, command_option
, param
, true_value
='true', false_value
='false', separator
=None):
3498 param
= params
.get(param
)
3499 assert param
in (True, False, None)
3500 return cli_option({True: true_value, False: false_value}
, command_option
, param
, separator
)
3503 def cli_valueless_option(params
, command_option
, param
, expected_value
=True):
3504 return [command_option
] if params
.get(param
) == expected_value
else []
3507 def cli_configuration_args(argdict
, keys
, default
=[], use_compat
=True):
3508 if isinstance(argdict
, (list, tuple)): # for backward compatibility
3515 assert isinstance(argdict
, dict)
3517 assert isinstance(keys
, (list, tuple))
3518 for key_list
in keys
:
3519 arg_list
= list(filter(
3520 lambda x
: x
is not None,
3521 [argdict
.get(key
.lower()) for key
in variadic(key_list
)]))
3523 return [arg
for args
in arg_list
for arg
in args
]
3527 def _configuration_args(main_key
, argdict
, exe
, keys
=None, default
=[], use_compat
=True):
3528 main_key
, exe
= main_key
.lower(), exe
.lower()
3529 root_key
= exe
if main_key
== exe
else f
'{main_key}+{exe}'
3530 keys
= [f
'{root_key}{k}' for k
in (keys
or [''])]
3531 if root_key
in keys
:
3533 keys
.append((main_key
, exe
))
3534 keys
.append('default')
3537 return cli_configuration_args(argdict
, keys
, default
, use_compat
)
3541 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3600 'iw': 'heb', # Replaced by he in 1989 revision
3610 'in': 'ind', # Replaced by id in 1989 revision
3726 'ji': 'yid', # Replaced by yi in 1989 revision
3734 def short2long(cls
, code
):
3735 """Convert language code from ISO 639-1 to ISO 639-2/T"""
3736 return cls
._lang
_map
.get(code
[:2])
3739 def long2short(cls
, code
):
3740 """Convert language code from ISO 639-2/T to ISO 639-1"""
3741 for short_name
, long_name
in cls
._lang
_map
.items():
3742 if long_name
== code
:
3747 # From http://data.okfn.org/data/core/country-list
3749 'AF': 'Afghanistan',
3750 'AX': 'Åland Islands',
3753 'AS': 'American Samoa',
3758 'AG': 'Antigua and Barbuda',
3775 'BO': 'Bolivia, Plurinational State of',
3776 'BQ': 'Bonaire, Sint Eustatius and Saba',
3777 'BA': 'Bosnia and Herzegovina',
3779 'BV': 'Bouvet Island',
3781 'IO': 'British Indian Ocean Territory',
3782 'BN': 'Brunei Darussalam',
3784 'BF': 'Burkina Faso',
3790 'KY': 'Cayman Islands',
3791 'CF': 'Central African Republic',
3795 'CX': 'Christmas Island',
3796 'CC': 'Cocos (Keeling) Islands',
3800 'CD': 'Congo, the Democratic Republic of the',
3801 'CK': 'Cook Islands',
3803 'CI': 'Côte d\'Ivoire',
3808 'CZ': 'Czech Republic',
3812 'DO': 'Dominican Republic',
3815 'SV': 'El Salvador',
3816 'GQ': 'Equatorial Guinea',
3820 'FK': 'Falkland Islands (Malvinas)',
3821 'FO': 'Faroe Islands',
3825 'GF': 'French Guiana',
3826 'PF': 'French Polynesia',
3827 'TF': 'French Southern Territories',
3842 'GW': 'Guinea-Bissau',
3845 'HM': 'Heard Island and McDonald Islands',
3846 'VA': 'Holy See (Vatican City State)',
3853 'IR': 'Iran, Islamic Republic of',
3856 'IM': 'Isle of Man',
3866 'KP': 'Korea, Democratic People\'s Republic of',
3867 'KR': 'Korea, Republic of',
3870 'LA': 'Lao People\'s Democratic Republic',
3876 'LI': 'Liechtenstein',
3880 'MK': 'Macedonia, the Former Yugoslav Republic of',
3887 'MH': 'Marshall Islands',
3893 'FM': 'Micronesia, Federated States of',
3894 'MD': 'Moldova, Republic of',
3905 'NL': 'Netherlands',
3906 'NC': 'New Caledonia',
3907 'NZ': 'New Zealand',
3912 'NF': 'Norfolk Island',
3913 'MP': 'Northern Mariana Islands',
3918 'PS': 'Palestine, State of',
3920 'PG': 'Papua New Guinea',
3923 'PH': 'Philippines',
3927 'PR': 'Puerto Rico',
3931 'RU': 'Russian Federation',
3933 'BL': 'Saint Barthélemy',
3934 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
3935 'KN': 'Saint Kitts and Nevis',
3936 'LC': 'Saint Lucia',
3937 'MF': 'Saint Martin (French part)',
3938 'PM': 'Saint Pierre and Miquelon',
3939 'VC': 'Saint Vincent and the Grenadines',
3942 'ST': 'Sao Tome and Principe',
3943 'SA': 'Saudi Arabia',
3947 'SL': 'Sierra Leone',
3949 'SX': 'Sint Maarten (Dutch part)',
3952 'SB': 'Solomon Islands',
3954 'ZA': 'South Africa',
3955 'GS': 'South Georgia and the South Sandwich Islands',
3956 'SS': 'South Sudan',
3961 'SJ': 'Svalbard and Jan Mayen',
3964 'CH': 'Switzerland',
3965 'SY': 'Syrian Arab Republic',
3966 'TW': 'Taiwan, Province of China',
3968 'TZ': 'Tanzania, United Republic of',
3970 'TL': 'Timor-Leste',
3974 'TT': 'Trinidad and Tobago',
3977 'TM': 'Turkmenistan',
3978 'TC': 'Turks and Caicos Islands',
3982 'AE': 'United Arab Emirates',
3983 'GB': 'United Kingdom',
3984 'US': 'United States',
3985 'UM': 'United States Minor Outlying Islands',
3989 'VE': 'Venezuela, Bolivarian Republic of',
3991 'VG': 'Virgin Islands, British',
3992 'VI': 'Virgin Islands, U.S.',
3993 'WF': 'Wallis and Futuna',
3994 'EH': 'Western Sahara',
3998 # Not ISO 3166 codes, but used for IP blocks
3999 'AP': 'Asia/Pacific Region',
4004 def short2full(cls
, code
):
4005 """Convert an ISO 3166-2 country code to the corresponding full name"""
4006 return cls
._country
_map
.get(code
.upper())
4010 # Major IPv4 address blocks per country
4012 'AD': '46.172.224.0/19',
4013 'AE': '94.200.0.0/13',
4014 'AF': '149.54.0.0/17',
4015 'AG': '209.59.64.0/18',
4016 'AI': '204.14.248.0/21',
4017 'AL': '46.99.0.0/16',
4018 'AM': '46.70.0.0/15',
4019 'AO': '105.168.0.0/13',
4020 'AP': '182.50.184.0/21',
4021 'AQ': '23.154.160.0/24',
4022 'AR': '181.0.0.0/12',
4023 'AS': '202.70.112.0/20',
4024 'AT': '77.116.0.0/14',
4025 'AU': '1.128.0.0/11',
4026 'AW': '181.41.0.0/18',
4027 'AX': '185.217.4.0/22',
4028 'AZ': '5.197.0.0/16',
4029 'BA': '31.176.128.0/17',
4030 'BB': '65.48.128.0/17',
4031 'BD': '114.130.0.0/16',
4033 'BF': '102.178.0.0/15',
4034 'BG': '95.42.0.0/15',
4035 'BH': '37.131.0.0/17',
4036 'BI': '154.117.192.0/18',
4037 'BJ': '137.255.0.0/16',
4038 'BL': '185.212.72.0/23',
4039 'BM': '196.12.64.0/18',
4040 'BN': '156.31.0.0/16',
4041 'BO': '161.56.0.0/16',
4042 'BQ': '161.0.80.0/20',
4043 'BR': '191.128.0.0/12',
4044 'BS': '24.51.64.0/18',
4045 'BT': '119.2.96.0/19',
4046 'BW': '168.167.0.0/16',
4047 'BY': '178.120.0.0/13',
4048 'BZ': '179.42.192.0/18',
4049 'CA': '99.224.0.0/11',
4050 'CD': '41.243.0.0/16',
4051 'CF': '197.242.176.0/21',
4052 'CG': '160.113.0.0/16',
4053 'CH': '85.0.0.0/13',
4054 'CI': '102.136.0.0/14',
4055 'CK': '202.65.32.0/19',
4056 'CL': '152.172.0.0/14',
4057 'CM': '102.244.0.0/14',
4058 'CN': '36.128.0.0/10',
4059 'CO': '181.240.0.0/12',
4060 'CR': '201.192.0.0/12',
4061 'CU': '152.206.0.0/15',
4062 'CV': '165.90.96.0/19',
4063 'CW': '190.88.128.0/17',
4064 'CY': '31.153.0.0/16',
4065 'CZ': '88.100.0.0/14',
4067 'DJ': '197.241.0.0/17',
4068 'DK': '87.48.0.0/12',
4069 'DM': '192.243.48.0/20',
4070 'DO': '152.166.0.0/15',
4071 'DZ': '41.96.0.0/12',
4072 'EC': '186.68.0.0/15',
4073 'EE': '90.190.0.0/15',
4074 'EG': '156.160.0.0/11',
4075 'ER': '196.200.96.0/20',
4076 'ES': '88.0.0.0/11',
4077 'ET': '196.188.0.0/14',
4078 'EU': '2.16.0.0/13',
4079 'FI': '91.152.0.0/13',
4080 'FJ': '144.120.0.0/16',
4081 'FK': '80.73.208.0/21',
4082 'FM': '119.252.112.0/20',
4083 'FO': '88.85.32.0/19',
4085 'GA': '41.158.0.0/15',
4087 'GD': '74.122.88.0/21',
4088 'GE': '31.146.0.0/16',
4089 'GF': '161.22.64.0/18',
4090 'GG': '62.68.160.0/19',
4091 'GH': '154.160.0.0/12',
4092 'GI': '95.164.0.0/16',
4093 'GL': '88.83.0.0/19',
4094 'GM': '160.182.0.0/15',
4095 'GN': '197.149.192.0/18',
4096 'GP': '104.250.0.0/19',
4097 'GQ': '105.235.224.0/20',
4098 'GR': '94.64.0.0/13',
4099 'GT': '168.234.0.0/16',
4100 'GU': '168.123.0.0/16',
4101 'GW': '197.214.80.0/20',
4102 'GY': '181.41.64.0/18',
4103 'HK': '113.252.0.0/14',
4104 'HN': '181.210.0.0/16',
4105 'HR': '93.136.0.0/13',
4106 'HT': '148.102.128.0/17',
4107 'HU': '84.0.0.0/14',
4108 'ID': '39.192.0.0/10',
4109 'IE': '87.32.0.0/12',
4110 'IL': '79.176.0.0/13',
4111 'IM': '5.62.80.0/20',
4112 'IN': '117.192.0.0/10',
4113 'IO': '203.83.48.0/21',
4114 'IQ': '37.236.0.0/14',
4115 'IR': '2.176.0.0/12',
4116 'IS': '82.221.0.0/16',
4117 'IT': '79.0.0.0/10',
4118 'JE': '87.244.64.0/18',
4119 'JM': '72.27.0.0/17',
4120 'JO': '176.29.0.0/16',
4121 'JP': '133.0.0.0/8',
4122 'KE': '105.48.0.0/12',
4123 'KG': '158.181.128.0/17',
4124 'KH': '36.37.128.0/17',
4125 'KI': '103.25.140.0/22',
4126 'KM': '197.255.224.0/20',
4127 'KN': '198.167.192.0/19',
4128 'KP': '175.45.176.0/22',
4129 'KR': '175.192.0.0/10',
4130 'KW': '37.36.0.0/14',
4131 'KY': '64.96.0.0/15',
4132 'KZ': '2.72.0.0/13',
4133 'LA': '115.84.64.0/18',
4134 'LB': '178.135.0.0/16',
4135 'LC': '24.92.144.0/20',
4136 'LI': '82.117.0.0/19',
4137 'LK': '112.134.0.0/15',
4138 'LR': '102.183.0.0/16',
4139 'LS': '129.232.0.0/17',
4140 'LT': '78.56.0.0/13',
4141 'LU': '188.42.0.0/16',
4142 'LV': '46.109.0.0/16',
4143 'LY': '41.252.0.0/14',
4144 'MA': '105.128.0.0/11',
4145 'MC': '88.209.64.0/18',
4146 'MD': '37.246.0.0/16',
4147 'ME': '178.175.0.0/17',
4148 'MF': '74.112.232.0/21',
4149 'MG': '154.126.0.0/17',
4150 'MH': '117.103.88.0/21',
4151 'MK': '77.28.0.0/15',
4152 'ML': '154.118.128.0/18',
4153 'MM': '37.111.0.0/17',
4154 'MN': '49.0.128.0/17',
4155 'MO': '60.246.0.0/16',
4156 'MP': '202.88.64.0/20',
4157 'MQ': '109.203.224.0/19',
4158 'MR': '41.188.64.0/18',
4159 'MS': '208.90.112.0/22',
4160 'MT': '46.11.0.0/16',
4161 'MU': '105.16.0.0/12',
4162 'MV': '27.114.128.0/18',
4163 'MW': '102.70.0.0/15',
4164 'MX': '187.192.0.0/11',
4165 'MY': '175.136.0.0/13',
4166 'MZ': '197.218.0.0/15',
4167 'NA': '41.182.0.0/16',
4168 'NC': '101.101.0.0/18',
4169 'NE': '197.214.0.0/18',
4170 'NF': '203.17.240.0/22',
4171 'NG': '105.112.0.0/12',
4172 'NI': '186.76.0.0/15',
4173 'NL': '145.96.0.0/11',
4174 'NO': '84.208.0.0/13',
4175 'NP': '36.252.0.0/15',
4176 'NR': '203.98.224.0/19',
4177 'NU': '49.156.48.0/22',
4178 'NZ': '49.224.0.0/14',
4179 'OM': '5.36.0.0/15',
4180 'PA': '186.72.0.0/15',
4181 'PE': '186.160.0.0/14',
4182 'PF': '123.50.64.0/18',
4183 'PG': '124.240.192.0/19',
4184 'PH': '49.144.0.0/13',
4185 'PK': '39.32.0.0/11',
4186 'PL': '83.0.0.0/11',
4187 'PM': '70.36.0.0/20',
4188 'PR': '66.50.0.0/16',
4189 'PS': '188.161.0.0/16',
4190 'PT': '85.240.0.0/13',
4191 'PW': '202.124.224.0/20',
4192 'PY': '181.120.0.0/14',
4193 'QA': '37.210.0.0/15',
4194 'RE': '102.35.0.0/16',
4195 'RO': '79.112.0.0/13',
4196 'RS': '93.86.0.0/15',
4197 'RU': '5.136.0.0/13',
4198 'RW': '41.186.0.0/16',
4199 'SA': '188.48.0.0/13',
4200 'SB': '202.1.160.0/19',
4201 'SC': '154.192.0.0/11',
4202 'SD': '102.120.0.0/13',
4203 'SE': '78.64.0.0/12',
4204 'SG': '8.128.0.0/10',
4205 'SI': '188.196.0.0/14',
4206 'SK': '78.98.0.0/15',
4207 'SL': '102.143.0.0/17',
4208 'SM': '89.186.32.0/19',
4209 'SN': '41.82.0.0/15',
4210 'SO': '154.115.192.0/18',
4211 'SR': '186.179.128.0/17',
4212 'SS': '105.235.208.0/21',
4213 'ST': '197.159.160.0/19',
4214 'SV': '168.243.0.0/16',
4215 'SX': '190.102.0.0/20',
4217 'SZ': '41.84.224.0/19',
4218 'TC': '65.255.48.0/20',
4219 'TD': '154.68.128.0/19',
4220 'TG': '196.168.0.0/14',
4221 'TH': '171.96.0.0/13',
4222 'TJ': '85.9.128.0/18',
4223 'TK': '27.96.24.0/21',
4224 'TL': '180.189.160.0/20',
4225 'TM': '95.85.96.0/19',
4226 'TN': '197.0.0.0/11',
4227 'TO': '175.176.144.0/21',
4228 'TR': '78.160.0.0/11',
4229 'TT': '186.44.0.0/15',
4230 'TV': '202.2.96.0/19',
4231 'TW': '120.96.0.0/11',
4232 'TZ': '156.156.0.0/14',
4233 'UA': '37.52.0.0/14',
4234 'UG': '102.80.0.0/13',
4236 'UY': '167.56.0.0/13',
4237 'UZ': '84.54.64.0/18',
4238 'VA': '212.77.0.0/19',
4239 'VC': '207.191.240.0/21',
4240 'VE': '186.88.0.0/13',
4241 'VG': '66.81.192.0/20',
4242 'VI': '146.226.0.0/16',
4243 'VN': '14.160.0.0/11',
4244 'VU': '202.80.32.0/20',
4245 'WF': '117.20.32.0/21',
4246 'WS': '202.4.32.0/19',
4247 'YE': '134.35.0.0/16',
4248 'YT': '41.242.116.0/22',
4249 'ZA': '41.0.0.0/11',
4250 'ZM': '102.144.0.0/13',
4251 'ZW': '102.177.192.0/18',
4255 def random_ipv4(cls
, code_or_block
):
4256 if len(code_or_block
) == 2:
4257 block
= cls
._country
_ip
_map
.get(code_or_block
.upper())
4261 block
= code_or_block
4262 addr
, preflen
= block
.split('/')
4263 addr_min
= struct
.unpack('!L', socket
.inet_aton(addr
))[0]
4264 addr_max
= addr_min |
(0xffffffff >> int(preflen
))
4265 return str(socket
.inet_ntoa(
4266 struct
.pack('!L', random
.randint(addr_min
, addr_max
))))
4269 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4270 # released into Public Domain
4271 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4273 def long_to_bytes(n
, blocksize
=0):
4274 """long_to_bytes(n:long, blocksize:int) : string
4275 Convert a long integer to a byte string.
4277 If optional blocksize is given and greater than zero, pad the front of the
4278 byte string with binary zeros so that the length is a multiple of
4281 # after much testing, this algorithm was deemed to be the fastest
4285 s
= struct
.pack('>I', n
& 0xffffffff) + s
4287 # strip off leading zeros
4288 for i
in range(len(s
)):
4289 if s
[i
] != b
'\000'[0]:
4292 # only happens when n == 0
4296 # add back some pad bytes. this could be done more efficiently w.r.t. the
4297 # de-padding being done above, but sigh...
4298 if blocksize
> 0 and len(s
) % blocksize
:
4299 s
= (blocksize
- len(s
) % blocksize
) * b
'\000' + s
4303 def bytes_to_long(s
):
4304 """bytes_to_long(string) : long
4305 Convert a byte string to a long integer.
4307 This is (essentially) the inverse of long_to_bytes().
4312 extra
= (4 - length
% 4)
4313 s
= b
'\000' * extra
+ s
4314 length
= length
+ extra
4315 for i
in range(0, length
, 4):
4316 acc
= (acc
<< 32) + struct
.unpack('>I', s
[i
:i
+ 4])[0]
4320 def ohdave_rsa_encrypt(data
, exponent
, modulus
):
4322 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4325 data: data to encrypt, bytes-like object
4326 exponent, modulus: parameter e and N of RSA algorithm, both integer
4327 Output: hex string of encrypted data
4329 Limitation: supports one block encryption only
4332 payload
= int(binascii
.hexlify(data
[::-1]), 16)
4333 encrypted
= pow(payload
, exponent
, modulus
)
4334 return '%x' % encrypted
4337 def pkcs1pad(data
, length
):
4339 Padding input data with PKCS#1 scheme
4341 @param {int[]} data input data
4342 @param {int} length target length
4343 @returns {int[]} padded data
4345 if len(data
) > length
- 11:
4346 raise ValueError('Input data too long for PKCS#1 padding')
4348 pseudo_random
= [random
.randint(0, 254) for _
in range(length
- len(data
) - 3)]
4349 return [0, 2] + pseudo_random
+ [0] + data
4352 def _base_n_table(n
, table
):
4353 if not table
and not n
:
4354 raise ValueError('Either table or n must be specified')
4355 table
= (table
or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n
]
4357 if n
and n
!= len(table
):
4358 raise ValueError(f
'base {n} exceeds table length {len(table)}')
4362 def encode_base_n(num
, n
=None, table
=None):
4363 """Convert given int to a base-n string"""
4364 table
= _base_n_table(n
, table
)
4368 result
, base
= '', len(table
)
4370 result
= table
[num
% base
] + result
4375 def decode_base_n(string
, n
=None, table
=None):
4376 """Convert given base-n string to int"""
4377 table
= {char: index for index, char in enumerate(_base_n_table(n, table))}
4378 result
, base
= 0, len(table
)
4380 result
= result
* base
+ table
[char
]
4384 def decode_packed_codes(code
):
4385 mobj
= re
.search(PACKED_CODES_RE
, code
)
4386 obfuscated_code
, base
, count
, symbols
= mobj
.groups()
4389 symbols
= symbols
.split('|')
4394 base_n_count
= encode_base_n(count
, base
)
4395 symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
4398 r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)],
4402 def caesar(s
, alphabet
, shift
):
4407 alphabet
[(alphabet
.index(c
) + shift
) % l
] if c
in alphabet
else c
4412 return caesar(s
, r
'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4415 def parse_m3u8_attributes(attrib
):
4417 for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
):
4418 if val
.startswith('"'):
4424 def urshift(val
, n
):
4425 return val
>> n
if val
>= 0 else (val
+ 0x100000000) >> n
4428 def write_xattr(path
, key
, value
):
4429 # Windows: Write xattrs to NTFS Alternate Data Streams:
4430 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4431 if compat_os_name
== 'nt':
4432 assert ':' not in key
4433 assert os
.path
.exists(path
)
4436 with open(f
'{path}:{key}', 'wb') as f
:
4438 except OSError as e
:
4439 raise XAttrMetadataError(e
.errno
, e
.strerror
)
4442 # UNIX Method 1. Use os.setxattr/xattrs/pyxattrs modules
4445 if callable(getattr(os
, 'setxattr', None)):
4446 setxattr
= os
.setxattr
4447 elif getattr(xattr
, '_yt_dlp__identifier', None) == 'pyxattr':
4448 # Unicode arguments are not supported in pyxattr until version 0.5.0
4449 # See https://github.com/ytdl-org/youtube-dl/issues/5498
4450 if version_tuple(xattr
.__version
__) >= (0, 5, 0):
4451 setxattr
= xattr
.set
4453 setxattr
= xattr
.setxattr
4457 setxattr(path
, key
, value
)
4458 except OSError as e
:
4459 raise XAttrMetadataError(e
.errno
, e
.strerror
)
4462 # UNIX Method 2. Use setfattr/xattr executables
4463 exe
= ('setfattr' if check_executable('setfattr', ['--version'])
4464 else 'xattr' if check_executable('xattr', ['-h']) else None)
4466 raise XAttrUnavailableError(
4467 'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
4468 + ('"xattr" binary' if sys
.platform
!= 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
4470 value
= value
.decode()
4472 _
, stderr
, returncode
= Popen
.run(
4473 [exe
, '-w', key
, value
, path
] if exe
== 'xattr' else [exe
, '-n', key
, '-v', value
, path
],
4474 text
=True, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
)
4475 except OSError as e
:
4476 raise XAttrMetadataError(e
.errno
, e
.strerror
)
4478 raise XAttrMetadataError(returncode
, stderr
)
4481 def random_birthday(year_field
, month_field
, day_field
):
4482 start_date
= datetime
.date(1950, 1, 1)
4483 end_date
= datetime
.date(1995, 12, 31)
4484 offset
= random
.randint(0, (end_date
- start_date
).days
)
4485 random_date
= start_date
+ datetime
.timedelta(offset
)
4487 year_field
: str(random_date
.year
),
4488 month_field
: str(random_date
.month
),
4489 day_field
: str(random_date
.day
),
4493 def find_available_port(interface
=''):
4495 with socket
.socket() as sock
:
4496 sock
.bind((interface
, 0))
4497 return sock
.getsockname()[1]
4502 # Templates for internet shortcut files, which are plain text files.
4503 DOT_URL_LINK_TEMPLATE
= '''\
4508 DOT_WEBLOC_LINK_TEMPLATE
= '''\
4509 <?xml version="1.0" encoding="UTF-8"?>
4510 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4511 <plist version="1.0">
4514 \t<string>%(url)s</string>
4519 DOT_DESKTOP_LINK_TEMPLATE
= '''\
4529 'url': DOT_URL_LINK_TEMPLATE
,
4530 'desktop': DOT_DESKTOP_LINK_TEMPLATE
,
4531 'webloc': DOT_WEBLOC_LINK_TEMPLATE
,
4535 def iri_to_uri(iri
):
4537 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
4539 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
4542 iri_parts
= urllib
.parse
.urlparse(iri
)
4544 if '[' in iri_parts
.netloc
:
4545 raise ValueError('IPv6 URIs are not, yet, supported.')
4546 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
4548 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
4551 if iri_parts
.username
:
4552 net_location
+= urllib
.parse
.quote(iri_parts
.username
, safe
=r
"!$%&'()*+,~")
4553 if iri_parts
.password
is not None:
4554 net_location
+= ':' + urllib
.parse
.quote(iri_parts
.password
, safe
=r
"!$%&'()*+,~")
4557 net_location
+= iri_parts
.hostname
.encode('idna').decode() # Punycode for Unicode hostnames.
4558 # The 'idna' encoding produces ASCII text.
4559 if iri_parts
.port
is not None and iri_parts
.port
!= 80:
4560 net_location
+= ':' + str(iri_parts
.port
)
4562 return urllib
.parse
.urlunparse(
4566 urllib
.parse
.quote_plus(iri_parts
.path
, safe
=r
"!$%&'()*+,/:;=@|~"),
4568 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
4569 urllib
.parse
.quote_plus(iri_parts
.params
, safe
=r
"!$%&'()*+,/:;=@|~"),
4571 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
4572 urllib
.parse
.quote_plus(iri_parts
.query
, safe
=r
"!$%&'()*+,/:;=?@{|}~"),
4574 urllib
.parse
.quote_plus(iri_parts
.fragment
, safe
=r
"!#$%&'()*+,/:;=?@{|}~")))
4576 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
4579 def to_high_limit_path(path
):
4580 if sys
.platform
in ['win32', 'cygwin']:
4581 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
4582 return '\\\\?\\' + os
.path
.abspath(path
)
4587 def format_field(obj
, field
=None, template
='%s', ignore
=NO_DEFAULT
, default
='', func
=IDENTITY
):
4588 val
= traversal
.traverse_obj(obj
, *variadic(field
))
4589 if not val
if ignore
is NO_DEFAULT
else val
in variadic(ignore
):
4591 return template
% func(val
)
4594 def clean_podcast_url(url
):
4595 url
= re
.sub(r
'''(?x)
4599 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
4604 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
4607 cn\.co| # https://podcorn.com/analytics-prefix/
4608 st\.fm # https://podsights.com/docs/
4613 return re
.sub(r
'^\w+://(\w+://)', r
'\1', url
)
4616 _HEX_TABLE
= '0123456789abcdef'
4619 def random_uuidv4():
4620 return re
.sub(r
'[xy]', lambda x
: _HEX_TABLE
[random
.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
4623 def make_dir(path
, to_screen
=None):
4625 dn
= os
.path
.dirname(path
)
4627 os
.makedirs(dn
, exist_ok
=True)
4629 except OSError as err
:
4630 if callable(to_screen
) is not None:
4631 to_screen(f
'unable to create directory {err}')
4635 def get_executable_path():
4636 from ..update
import _get_variant_and_executable_path
4638 return os
.path
.dirname(os
.path
.abspath(_get_variant_and_executable_path()[1]))
4641 def get_user_config_dirs(package_name
):
4642 # .config (e.g. ~/.config/package_name)
4643 xdg_config_home
= os
.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
4644 yield os
.path
.join(xdg_config_home
, package_name
)
4646 # appdata (%APPDATA%/package_name)
4647 appdata_dir
= os
.getenv('appdata')
4649 yield os
.path
.join(appdata_dir
, package_name
)
4651 # home (~/.package_name)
4652 yield os
.path
.join(compat_expanduser('~'), f
'.{package_name}')
4655 def get_system_config_dirs(package_name
):
4657 yield os
.path
.join('/etc', package_name
)
4660 def time_seconds(**kwargs
):
4662 Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
4664 return time
.time() + datetime
.timedelta(**kwargs
).total_seconds()
4667 # create a JSON Web Signature (jws) with HS256 algorithm
4668 # the resulting format is in JWS Compact Serialization
4669 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
4670 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
4671 def jwt_encode_hs256(payload_data
, key
, headers
={}):
4677 header_data
.update(headers
)
4678 header_b64
= base64
.b64encode(json
.dumps(header_data
).encode())
4679 payload_b64
= base64
.b64encode(json
.dumps(payload_data
).encode())
4680 h
= hmac
.new(key
.encode(), header_b64
+ b
'.' + payload_b64
, hashlib
.sha256
)
4681 signature_b64
= base64
.b64encode(h
.digest())
4682 token
= header_b64
+ b
'.' + payload_b64
+ b
'.' + signature_b64
4686 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
4687 def jwt_decode_hs256(jwt
):
4688 header_b64
, payload_b64
, signature_b64
= jwt
.split('.')
4689 # add trailing ='s that may have been stripped, superfluous ='s are ignored
4690 payload_data
= json
.loads(base64
.urlsafe_b64decode(f
'{payload_b64}==='))
4694 WINDOWS_VT_MODE
= False if compat_os_name
== 'nt' else None
4698 def supports_terminal_sequences(stream
):
4699 if compat_os_name
== 'nt':
4700 if not WINDOWS_VT_MODE
:
4702 elif not os
.getenv('TERM'):
4705 return stream
.isatty()
4706 except BaseException
:
4710 def windows_enable_vt_mode():
4711 """Ref: https://bugs.python.org/issue30075 """
4712 if get_windows_version() < (10, 0, 10586):
4716 import ctypes
.wintypes
4719 ENABLE_VIRTUAL_TERMINAL_PROCESSING
= 0x0004
4721 dll
= ctypes
.WinDLL('kernel32', use_last_error
=False)
4722 handle
= os
.open('CONOUT$', os
.O_RDWR
)
4724 h_out
= ctypes
.wintypes
.HANDLE(msvcrt
.get_osfhandle(handle
))
4725 dw_original_mode
= ctypes
.wintypes
.DWORD()
4726 success
= dll
.GetConsoleMode(h_out
, ctypes
.byref(dw_original_mode
))
4728 raise Exception('GetConsoleMode failed')
4730 success
= dll
.SetConsoleMode(h_out
, ctypes
.wintypes
.DWORD(
4731 dw_original_mode
.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING
))
4733 raise Exception('SetConsoleMode failed')
4737 global WINDOWS_VT_MODE
4738 WINDOWS_VT_MODE
= True
4739 supports_terminal_sequences
.cache_clear()
4742 _terminal_sequences_re
= re
.compile('\033\\[[^m]+m')
4745 def remove_terminal_sequences(string
):
4746 return _terminal_sequences_re
.sub('', string
)
4749 def number_of_digits(number
):
4750 return len('%d' % number
)
4753 def join_nonempty(*values
, delim
='-', from_dict
=None):
4754 if from_dict
is not None:
4755 values
= (traversal
.traverse_obj(from_dict
, variadic(v
)) for v
in values
)
4756 return delim
.join(map(str, filter(None, values
)))
4759 def scale_thumbnails_to_max_format_width(formats
, thumbnails
, url_width_re
):
4761 Find the largest format dimensions in terms of video width and, for each thumbnail:
4762 * Modify the URL: Match the width with the provided regex and replace with the former width
4765 This function is useful with video services that scale the provided thumbnails on demand
4767 _keys
= ('width', 'height')
4768 max_dimensions
= max(
4769 (tuple(format
.get(k
) or 0 for k
in _keys
) for format
in formats
),
4771 if not max_dimensions
[0]:
4775 {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])}
,
4776 dict(zip(_keys
, max_dimensions
)), thumbnail
)
4777 for thumbnail
in thumbnails
4781 def parse_http_range(range):
4782 """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
4784 return None, None, None
4785 crg
= re
.search(r
'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
4787 return None, None, None
4788 return int(crg
.group(1)), int_or_none(crg
.group(2)), int_or_none(crg
.group(3))
4791 def read_stdin(what
):
4793 eof
= 'Ctrl+Z' if compat_os_name
== 'nt' else 'Ctrl+D'
4794 write_string(f
'Reading {what} from STDIN - EOF ({eof}) to end:\n')
4798 def determine_file_encoding(data
):
4800 Detect the text encoding used
4801 @returns (encoding, bytes to skip)
4804 # BOM marks are given priority over declarations
4805 for bom
, enc
in BOMS
:
4806 if data
.startswith(bom
):
4807 return enc
, len(bom
)
4809 # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
4810 # We ignore the endianness to get a good enough match
4811 data
= data
.replace(b
'\0', b
'')
4812 mobj
= re
.match(rb
'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data
)
4813 return mobj
.group(1).decode() if mobj
else None, 0
4820 __initialized
= False
4822 def __init__(self
, parser
, label
=None):
4823 self
.parser
, self
.label
= parser
, label
4824 self
._loaded
_paths
, self
.configs
= set(), []
4826 def init(self
, args
=None, filename
=None):
4827 assert not self
.__initialized
4828 self
.own_args
, self
.filename
= args
, filename
4829 return self
.load_configs()
4831 def load_configs(self
):
4834 location
= os
.path
.realpath(self
.filename
)
4835 directory
= os
.path
.dirname(location
)
4836 if location
in self
._loaded
_paths
:
4838 self
._loaded
_paths
.add(location
)
4840 self
.__initialized
= True
4841 opts
, _
= self
.parser
.parse_known_args(self
.own_args
)
4842 self
.parsed_args
= self
.own_args
4843 for location
in opts
.config_locations
or []:
4845 if location
in self
._loaded
_paths
:
4847 self
._loaded
_paths
.add(location
)
4848 self
.append_config(shlex
.split(read_stdin('options'), comments
=True), label
='stdin')
4850 location
= os
.path
.join(directory
, expand_path(location
))
4851 if os
.path
.isdir(location
):
4852 location
= os
.path
.join(location
, 'yt-dlp.conf')
4853 if not os
.path
.exists(location
):
4854 self
.parser
.error(f
'config location {location} does not exist')
4855 self
.append_config(self
.read_file(location
), location
)
4859 label
= join_nonempty(
4860 self
.label
, 'config', f
'"{self.filename}"' if self
.filename
else '',
4862 return join_nonempty(
4863 self
.own_args
is not None and f
'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
4864 *(f
'\n{c}'.replace('\n', '\n| ')[1:] for c
in self
.configs
),
4868 def read_file(filename
, default
=[]):
4870 optionf
= open(filename
, 'rb')
4872 return default
# silently skip if file is not present
4874 enc
, skip
= determine_file_encoding(optionf
.read(512))
4875 optionf
.seek(skip
, io
.SEEK_SET
)
4877 enc
= None # silently skip read errors
4879 # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
4880 contents
= optionf
.read().decode(enc
or preferredencoding())
4881 res
= shlex
.split(contents
, comments
=True)
4882 except Exception as err
:
4883 raise ValueError(f
'Unable to parse "{filename}": {err}')
4889 def hide_login_info(opts
):
4890 PRIVATE_OPTS
= {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
4891 eqre
= re
.compile('^(?P<key>' + ('|'.join(re
.escape(po
) for po
in PRIVATE_OPTS
)) + ')=.+$')
4896 return m
.group('key') + '=PRIVATE'
4900 opts
= list(map(_scrub_eq
, opts
))
4901 for idx
, opt
in enumerate(opts
):
4902 if opt
in PRIVATE_OPTS
and idx
+ 1 < len(opts
):
4903 opts
[idx
+ 1] = 'PRIVATE'
4906 def append_config(self
, *args
, label
=None):
4907 config
= type(self
)(self
.parser
, label
)
4908 config
._loaded
_paths
= self
._loaded
_paths
4909 if config
.init(*args
):
4910 self
.configs
.append(config
)
4914 for config
in reversed(self
.configs
):
4915 yield from config
.all_args
4916 yield from self
.parsed_args
or []
4918 def parse_known_args(self
, **kwargs
):
4919 return self
.parser
.parse_known_args(self
.all_args
, **kwargs
)
4921 def parse_args(self
):
4922 return self
.parser
.parse_args(self
.all_args
)
4925 def merge_headers(*dicts
):
4926 """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
4927 return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
4930 def cached_method(f
):
4931 """Cache a method"""
4932 signature
= inspect
.signature(f
)
4935 def wrapper(self
, *args
, **kwargs
):
4936 bound_args
= signature
.bind(self
, *args
, **kwargs
)
4937 bound_args
.apply_defaults()
4938 key
= tuple(bound_args
.arguments
.values())[1:]
4940 cache
= vars(self
).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {}
)
4941 if key
not in cache
:
4942 cache
[key
] = f(self
, *args
, **kwargs
)
4947 class classproperty
:
4948 """property access for class methods with optional caching"""
4949 def __new__(cls
, func
=None, *args
, **kwargs
):
4951 return functools
.partial(cls
, *args
, **kwargs
)
4952 return super().__new
__(cls
)
4954 def __init__(self
, func
, *, cache
=False):
4955 functools
.update_wrapper(self
, func
)
4957 self
._cache
= {} if cache
else None
4959 def __get__(self
, _
, cls
):
4960 if self
._cache
is None:
4961 return self
.func(cls
)
4962 elif cls
not in self
._cache
:
4963 self
._cache
[cls
] = self
.func(cls
)
4964 return self
._cache
[cls
]
4967 class function_with_repr
:
4968 def __init__(self
, func
, repr_
=None):
4969 functools
.update_wrapper(self
, func
)
4970 self
.func
, self
.__repr
= func
, repr_
4972 def __call__(self
, *args
, **kwargs
):
4973 return self
.func(*args
, **kwargs
)
4978 return f
'{self.func.__module__}.{self.func.__qualname__}'
4981 class Namespace(types
.SimpleNamespace
):
4982 """Immutable namespace"""
4985 return iter(self
.__dict
__.values())
4989 return self
.__dict
__.items()
4992 MEDIA_EXTENSIONS
= Namespace(
4993 common_video
=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
4994 video
=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
4995 common_audio
=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
4996 audio
=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
4997 thumbnails
=('jpg', 'png', 'webp'),
4998 storyboards
=('mhtml', ),
4999 subtitles
=('srt', 'vtt', 'ass', 'lrc'),
5000 manifests
=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5002 MEDIA_EXTENSIONS
.video
+= MEDIA_EXTENSIONS
.common_video
5003 MEDIA_EXTENSIONS
.audio
+= MEDIA_EXTENSIONS
.common_audio
5005 KNOWN_EXTENSIONS
= (*MEDIA_EXTENSIONS
.video
, *MEDIA_EXTENSIONS
.audio
, *MEDIA_EXTENSIONS
.manifests
)
5010 for retry in RetryManager(...):
5013 except SomeException as err:
5017 attempt
, _error
= 0, None
5019 def __init__(self
, _retries
, _error_callback
, **kwargs
):
5020 self
.retries
= _retries
or 0
5021 self
.error_callback
= functools
.partial(_error_callback
, **kwargs
)
5023 def _should_retry(self
):
5024 return self
._error
is not NO_DEFAULT
and self
.attempt
<= self
.retries
5028 if self
._error
is NO_DEFAULT
:
5033 def error(self
, value
):
5037 while self
._should
_retry
():
5038 self
.error
= NO_DEFAULT
5042 self
.error_callback(self
.error
, self
.attempt
, self
.retries
)
5045 def report_retry(e
, count
, retries
, *, sleep_func
, info
, warn
, error
=None, suffix
=None):
5046 """Utility function for reporting retries"""
5049 return error(f
'{e}. Giving up after {count - 1} retries') if count
> 1 else error(str(e
))
5054 elif isinstance(e
, ExtractorError
):
5055 e
= remove_end(str_or_none(e
.cause
) or e
.orig_msg
, '.')
5056 warn(f
'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5058 delay
= float_or_none(sleep_func(n
=count
- 1)) if callable(sleep_func
) else sleep_func
5060 info(f
'Sleeping {delay:.2f} seconds ...')
5064 def make_archive_id(ie
, video_id
):
5065 ie_key
= ie
if isinstance(ie
, str) else ie
.ie_key()
5066 return f
'{ie_key.lower()} {video_id}'
5069 def truncate_string(s
, left
, right
=0):
5070 assert left
> 3 and right
>= 0
5071 if s
is None or len(s
) <= left
+ right
:
5073 return f
'{s[:left-3]}...{s[-right:] if right else ""}'
5076 def orderedSet_from_options(options
, alias_dict
, *, use_regex
=False, start
=None):
5077 assert 'all' in alias_dict
, '"all" alias is required'
5078 requested
= list(start
or [])
5080 discard
= val
.startswith('-')
5084 if val
in alias_dict
:
5085 val
= alias_dict
[val
] if not discard
else [
5086 i
[1:] if i
.startswith('-') else f
'-{i}' for i
in alias_dict
[val
]]
5087 # NB: Do not allow regex in aliases for performance
5088 requested
= orderedSet_from_options(val
, alias_dict
, start
=requested
)
5091 current
= (filter(re
.compile(val
, re
.I
).fullmatch
, alias_dict
['all']) if use_regex
5092 else [val
] if val
in alias_dict
['all'] else None)
5094 raise ValueError(val
)
5097 for item
in current
:
5098 while item
in requested
:
5099 requested
.remove(item
)
5101 requested
.extend(current
)
5103 return orderedSet(requested
)
5108 regex
= r
' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
5110 default
= ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
5111 'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
5112 'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
5113 ytdl_default
= ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
5114 'height', 'width', 'proto', 'vext', 'abr', 'aext',
5115 'fps', 'fs_approx', 'source', 'id')
5118 'vcodec': {'type': 'ordered', 'regex': True,
5119 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
5120 'acodec': {'type': 'ordered', 'regex': True,
5121 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
5122 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
5123 'order': ['dv', '(hdr)?12', r
'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
5124 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
5125 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
5126 'vext': {'type': 'ordered', 'field': 'video_ext',
5127 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
5128 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
5129 'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
5130 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
5131 'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
5132 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}
,
5133 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
5134 'field': ('vcodec', 'acodec'),
5135 'function': lambda it
: int(any(v
!= 'none' for v
in it
))},
5136 'ie_pref': {'priority': True, 'type': 'extractor'}
,
5137 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)}
,
5138 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}
,
5139 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1}
,
5140 'quality': {'convert': 'float', 'default': -1}
,
5141 'filesize': {'convert': 'bytes'}
,
5142 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'}
,
5143 'id': {'convert': 'string', 'field': 'format_id'}
,
5144 'height': {'convert': 'float_none'}
,
5145 'width': {'convert': 'float_none'}
,
5146 'fps': {'convert': 'float_none'}
,
5147 'channels': {'convert': 'float_none', 'field': 'audio_channels'}
,
5148 'tbr': {'convert': 'float_none'}
,
5149 'vbr': {'convert': 'float_none'}
,
5150 'abr': {'convert': 'float_none'}
,
5151 'asr': {'convert': 'float_none'}
,
5152 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1}
,
5154 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')}
,
5155 'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
5156 'function': lambda it
: next(filter(None, it
), None)},
5157 'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
5158 'function': lambda it
: next(filter(None, it
), None)},
5159 'ext': {'type': 'combined', 'field': ('vext', 'aext')}
,
5160 'res': {'type': 'multiple', 'field': ('height', 'width'),
5161 'function': lambda it
: (lambda l
: min(l
) if l
else 0)(tuple(filter(None, it
)))},
5163 # Actual field names
5164 'format_id': {'type': 'alias', 'field': 'id'}
,
5165 'preference': {'type': 'alias', 'field': 'ie_pref'}
,
5166 'language_preference': {'type': 'alias', 'field': 'lang'}
,
5167 'source_preference': {'type': 'alias', 'field': 'source'}
,
5168 'protocol': {'type': 'alias', 'field': 'proto'}
,
5169 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}
,
5170 'audio_channels': {'type': 'alias', 'field': 'channels'}
,
5173 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True}
,
5174 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True}
,
5175 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True}
,
5176 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True}
,
5177 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True}
,
5178 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True}
,
5179 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True}
,
5180 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True}
,
5181 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True}
,
5182 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True}
,
5183 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True}
,
5184 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True}
,
5185 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True}
,
5186 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True}
,
5187 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}
,
5188 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}
,
5189 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}
,
5190 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}
,
5191 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}
,
5192 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}
,
5195 def __init__(self
, ydl
, field_preference
):
5198 self
.evaluate_params(self
.ydl
.params
, field_preference
)
5199 if ydl
.params
.get('verbose'):
5200 self
.print_verbose_info(self
.ydl
.write_debug
)
5202 def _get_field_setting(self
, field
, key
):
5203 if field
not in self
.settings
:
5204 if key
in ('forced', 'priority'):
5206 self
.ydl
.deprecated_feature(f
'Using arbitrary fields ({field}) for format sorting is '
5207 'deprecated and may be removed in a future version')
5208 self
.settings
[field
] = {}
5209 propObj
= self
.settings
[field
]
5210 if key
not in propObj
:
5211 type = propObj
.get('type')
5213 default
= 'preference' if type == 'extractor' else (field
,) if type in ('combined', 'multiple') else field
5214 elif key
== 'convert':
5215 default
= 'order' if type == 'ordered' else 'float_string' if field
else 'ignore'
5217 default
= {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}
.get(key
, None)
5218 propObj
[key
] = default
5221 def _resolve_field_value(self
, field
, value
, convertNone
=False):
5226 value
= value
.lower()
5227 conversion
= self
._get
_field
_setting
(field
, 'convert')
5228 if conversion
== 'ignore':
5230 if conversion
== 'string':
5232 elif conversion
== 'float_none':
5233 return float_or_none(value
)
5234 elif conversion
== 'bytes':
5235 return parse_bytes(value
)
5236 elif conversion
== 'order':
5237 order_list
= (self
._use
_free
_order
and self
._get
_field
_setting
(field
, 'order_free')) or self
._get
_field
_setting
(field
, 'order')
5238 use_regex
= self
._get
_field
_setting
(field
, 'regex')
5239 list_length
= len(order_list
)
5240 empty_pos
= order_list
.index('') if '' in order_list
else list_length
+ 1
5241 if use_regex
and value
is not None:
5242 for i
, regex
in enumerate(order_list
):
5243 if regex
and re
.match(regex
, value
):
5244 return list_length
- i
5245 return list_length
- empty_pos
# not in list
5246 else: # not regex or value = None
5247 return list_length
- (order_list
.index(value
) if value
in order_list
else empty_pos
)
5249 if value
.isnumeric():
5252 self
.settings
[field
]['convert'] = 'string'
5255 def evaluate_params(self
, params
, sort_extractor
):
5256 self
._use
_free
_order
= params
.get('prefer_free_formats', False)
5257 self
._sort
_user
= params
.get('format_sort', [])
5258 self
._sort
_extractor
= sort_extractor
5260 def add_item(field
, reverse
, closest
, limit_text
):
5261 field
= field
.lower()
5262 if field
in self
._order
:
5264 self
._order
.append(field
)
5265 limit
= self
._resolve
_field
_value
(field
, limit_text
)
5268 'closest': False if limit
is None else closest
,
5269 'limit_text': limit_text
,
5271 if field
in self
.settings
:
5272 self
.settings
[field
].update(data
)
5274 self
.settings
[field
] = data
5277 tuple(field
for field
in self
.default
if self
._get
_field
_setting
(field
, 'forced'))
5278 + (tuple() if params
.get('format_sort_force', False)
5279 else tuple(field
for field
in self
.default
if self
._get
_field
_setting
(field
, 'priority')))
5280 + tuple(self
._sort
_user
) + tuple(sort_extractor
) + self
.default
)
5282 for item
in sort_list
:
5283 match
= re
.match(self
.regex
, item
)
5285 raise ExtractorError('Invalid format sort string "%s" given by extractor' % item
)
5286 field
= match
.group('field')
5289 if self
._get
_field
_setting
(field
, 'type') == 'alias':
5290 alias
, field
= field
, self
._get
_field
_setting
(field
, 'field')
5291 if self
._get
_field
_setting
(alias
, 'deprecated'):
5292 self
.ydl
.deprecated_feature(f
'Format sorting alias {alias} is deprecated and may '
5293 f
'be removed in a future version. Please use {field} instead')
5294 reverse
= match
.group('reverse') is not None
5295 closest
= match
.group('separator') == '~'
5296 limit_text
= match
.group('limit')
5298 has_limit
= limit_text
is not None
5299 has_multiple_fields
= self
._get
_field
_setting
(field
, 'type') == 'combined'
5300 has_multiple_limits
= has_limit
and has_multiple_fields
and not self
._get
_field
_setting
(field
, 'same_limit')
5302 fields
= self
._get
_field
_setting
(field
, 'field') if has_multiple_fields
else (field
,)
5303 limits
= limit_text
.split(':') if has_multiple_limits
else (limit_text
,) if has_limit
else tuple()
5304 limit_count
= len(limits
)
5305 for (i
, f
) in enumerate(fields
):
5306 add_item(f
, reverse
, closest
,
5307 limits
[i
] if i
< limit_count
5308 else limits
[0] if has_limit
and not has_multiple_limits
5311 def print_verbose_info(self
, write_debug
):
5313 write_debug('Sort order given by user: %s' % ', '.join(self
._sort
_user
))
5314 if self
._sort
_extractor
:
5315 write_debug('Sort order given by extractor: %s' % ', '.join(self
._sort
_extractor
))
5316 write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
5317 '+' if self
._get
_field
_setting
(field
, 'reverse') else '', field
,
5318 '%s%s(%s)' % ('~' if self
._get
_field
_setting
(field
, 'closest') else ':',
5319 self
._get
_field
_setting
(field
, 'limit_text'),
5320 self
._get
_field
_setting
(field
, 'limit'))
5321 if self
._get
_field
_setting
(field
, 'limit_text') is not None else '')
5322 for field
in self
._order
if self
._get
_field
_setting
(field
, 'visible')]))
5324 def _calculate_field_preference_from_value(self
, format
, field
, type, value
):
5325 reverse
= self
._get
_field
_setting
(field
, 'reverse')
5326 closest
= self
._get
_field
_setting
(field
, 'closest')
5327 limit
= self
._get
_field
_setting
(field
, 'limit')
5329 if type == 'extractor':
5330 maximum
= self
._get
_field
_setting
(field
, 'max')
5331 if value
is None or (maximum
is not None and value
>= maximum
):
5333 elif type == 'boolean':
5334 in_list
= self
._get
_field
_setting
(field
, 'in_list')
5335 not_in_list
= self
._get
_field
_setting
(field
, 'not_in_list')
5336 value
= 0 if ((in_list
is None or value
in in_list
) and (not_in_list
is None or value
not in not_in_list
)) else -1
5337 elif type == 'ordered':
5338 value
= self
._resolve
_field
_value
(field
, value
, True)
5340 # try to convert to number
5341 val_num
= float_or_none(value
, default
=self
._get
_field
_setting
(field
, 'default'))
5342 is_num
= self
._get
_field
_setting
(field
, 'convert') != 'string' and val_num
is not None
5346 return ((-10, 0) if value
is None
5347 else (1, value
, 0) if not is_num
# if a field has mixed strings and numbers, strings are sorted higher
5348 else (0, -abs(value
- limit
), value
- limit
if reverse
else limit
- value
) if closest
5349 else (0, value
, 0) if not reverse
and (limit
is None or value
<= limit
)
5350 else (0, -value
, 0) if limit
is None or (reverse
and value
== limit
) or value
> limit
5351 else (-1, value
, 0))
5353 def _calculate_field_preference(self
, format
, field
):
5354 type = self
._get
_field
_setting
(field
, 'type') # extractor, boolean, ordered, field, multiple
5355 get_value
= lambda f
: format
.get(self
._get
_field
_setting
(f
, 'field'))
5356 if type == 'multiple':
5357 type = 'field' # Only 'field' is allowed in multiple for now
5358 actual_fields
= self
._get
_field
_setting
(field
, 'field')
5360 value
= self
._get
_field
_setting
(field
, 'function')(get_value(f
) for f
in actual_fields
)
5362 value
= get_value(field
)
5363 return self
._calculate
_field
_preference
_from
_value
(format
, field
, type, value
)
5365 def calculate_preference(self
, format
):
5366 # Determine missing protocol
5367 if not format
.get('protocol'):
5368 format
['protocol'] = determine_protocol(format
)
5370 # Determine missing ext
5371 if not format
.get('ext') and 'url' in format
:
5372 format
['ext'] = determine_ext(format
['url'])
5373 if format
.get('vcodec') == 'none':
5374 format
['audio_ext'] = format
['ext'] if format
.get('acodec') != 'none' else 'none'
5375 format
['video_ext'] = 'none'
5377 format
['video_ext'] = format
['ext']
5378 format
['audio_ext'] = 'none'
5379 # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
5380 # format['preference'] = -1000
5382 if format
.get('preference') is None and format
.get('ext') == 'flv' and re
.match('[hx]265|he?vc?', format
.get('vcodec') or ''):
5383 # HEVC-over-FLV is out-of-spec by FLV's original spec
5384 # ref. https://trac.ffmpeg.org/ticket/6389
5385 # ref. https://github.com/yt-dlp/yt-dlp/pull/5821
5386 format
['preference'] = -100
5388 # Determine missing bitrates
5389 if format
.get('vcodec') == 'none':
5391 if format
.get('acodec') == 'none':
5393 if not format
.get('vbr') and format
.get('vcodec') != 'none':
5394 format
['vbr'] = try_call(lambda: format
['tbr'] - format
['abr']) or None
5395 if not format
.get('abr') and format
.get('acodec') != 'none':
5396 format
['abr'] = try_call(lambda: format
['tbr'] - format
['vbr']) or None
5397 if not format
.get('tbr'):
5398 format
['tbr'] = try_call(lambda: format
['vbr'] + format
['abr']) or None
5400 return tuple(self
._calculate
_field
_preference
(format
, field
) for field
in self
._order
)
5405 def __init__(self
, ydl
=None):
5408 def debug(self
, message
):
5410 self
._ydl
.write_debug(message
)
5412 def info(self
, message
):
5414 self
._ydl
.to_screen(message
)
5416 def warning(self
, message
, *, once
=False):
5418 self
._ydl
.report_warning(message
, once
)
5420 def error(self
, message
, *, is_error
=True):
5422 self
._ydl
.report_error(message
, is_error
=is_error
)
5424 def stdout(self
, message
):
5426 self
._ydl
.to_stdout(message
)
5428 def stderr(self
, message
):
5430 self
._ydl
.to_stderr(message
)