43 import xml
.etree
.ElementTree
45 from . import traversal
47 from ..compat
import functools
# isort: split
48 from ..compat
import (
49 compat_etree_fromstring
,
51 compat_HTMLParseError
,
55 from ..dependencies
import xattr
57 __name__
= __name__
.rsplit('.', 1)[0] # Pretend to be the parent module
59 # This is not clearly defined otherwise
60 compiled_regex_type
= type(re
.compile(''))
71 ENGLISH_MONTH_NAMES
= [
72 'January', 'February', 'March', 'April', 'May', 'June',
73 'July', 'August', 'September', 'October', 'November', 'December']
76 'en': ENGLISH_MONTH_NAMES
,
78 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
79 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
80 # these follow the genitive grammatical case (dopełniacz)
81 # some websites might be using nominative, which will require another month list
82 # https://en.wikibooks.org/wiki/Polish/Noun_cases
83 'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
84 'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
87 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
89 'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
90 'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
91 'EST': -5, 'EDT': -4, # Eastern
92 'CST': -6, 'CDT': -5, # Central
93 'MST': -7, 'MDT': -6, # Mountain
94 'PST': -8, 'PDT': -7 # Pacific
97 # needed for sanitizing filenames in restricted mode
98 ACCENT_CHARS
= dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
99 itertools
.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
100 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
130 '%Y-%m-%d %H:%M:%S.%f',
131 '%Y-%m-%d %H:%M:%S:%f',
134 '%Y-%m-%dT%H:%M:%SZ',
135 '%Y-%m-%dT%H:%M:%S.%fZ',
136 '%Y-%m-%dT%H:%M:%S.%f0Z',
138 '%Y-%m-%dT%H:%M:%S.%f',
141 '%b %d %Y at %H:%M:%S',
143 '%B %d %Y at %H:%M:%S',
147 DATE_FORMATS_DAY_FIRST
= list(DATE_FORMATS
)
148 DATE_FORMATS_DAY_FIRST
.extend([
159 DATE_FORMATS_MONTH_FIRST
= list(DATE_FORMATS
)
160 DATE_FORMATS_MONTH_FIRST
.extend([
168 PACKED_CODES_RE
= r
"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
169 JSON_LD_RE
= r
'(?is)<script[^>]+type=(["\']?
)application
/ld\
+json\
1[^
>]*>\s
*(?P
<json_ld
>{.+?}|\
[.+?\
])\s
*</script
>'
171 NUMBER_RE = r'\d
+(?
:\
.\d
+)?
'
175 def preferredencoding():
176 """Get preferred encoding.
178 Returns the best encoding scheme for the system, based on
179 locale.getpreferredencoding() and some further tweaks.
182 pref = locale.getpreferredencoding()
190 def write_json_file(obj, fn):
191 """ Encode obj as JSON and write it to fn, atomically if possible """
193 tf = tempfile.NamedTemporaryFile(
194 prefix=f'{os.path.basename(fn)}
.', dir=os.path.dirname(fn),
195 suffix='.tmp
', delete=False, mode='w
', encoding='utf
-8')
199 json.dump(obj, tf, ensure_ascii=False)
200 if sys.platform == 'win32
':
201 # Need to remove existing file on Windows, else os.rename raises
202 # WindowsError or FileExistsError.
203 with contextlib.suppress(OSError):
205 with contextlib.suppress(OSError):
208 os.chmod(tf.name, 0o666 & ~mask)
209 os.rename(tf.name, fn)
211 with contextlib.suppress(OSError):
216 def find_xpath_attr(node, xpath, key, val=None):
217 """ Find the xpath xpath[@key=val] """
218 assert re.match(r'^
[a
-zA
-Z_
-]+$
', key)
219 expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}
']")
220 return node.find(expr)
222 # On python2.6 the xml.etree.ElementTree.Element methods don't support
223 # the namespace parameter
226 def xpath_with_ns(path
, ns_map
):
227 components
= [c
.split(':') for c
in path
.split('/')]
231 replaced
.append(c
[0])
234 replaced
.append('{%s}%s' % (ns_map
[ns
], tag
))
235 return '/'.join(replaced
)
238 def xpath_element(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
239 def _find_xpath(xpath
):
240 return node
.find(xpath
)
242 if isinstance(xpath
, str):
243 n
= _find_xpath(xpath
)
251 if default
is not NO_DEFAULT
:
254 name
= xpath
if name
is None else name
255 raise ExtractorError('Could not find XML element %s' % name
)
261 def xpath_text(node
, xpath
, name
=None, fatal
=False, default
=NO_DEFAULT
):
262 n
= xpath_element(node
, xpath
, name
, fatal
=fatal
, default
=default
)
263 if n
is None or n
== default
:
266 if default
is not NO_DEFAULT
:
269 name
= xpath
if name
is None else name
270 raise ExtractorError('Could not find XML element\'s text %s' % name
)
276 def xpath_attr(node
, xpath
, key
, name
=None, fatal
=False, default
=NO_DEFAULT
):
277 n
= find_xpath_attr(node
, xpath
, key
)
279 if default
is not NO_DEFAULT
:
282 name
= f
'{xpath}[@{key}]' if name
is None else name
283 raise ExtractorError('Could not find XML attribute %s' % name
)
289 def get_element_by_id(id, html
, **kwargs
):
290 """Return the content of the tag with the specified ID in the passed HTML document"""
291 return get_element_by_attribute('id', id, html
, **kwargs
)
294 def get_element_html_by_id(id, html
, **kwargs
):
295 """Return the html of the tag with the specified ID in the passed HTML document"""
296 return get_element_html_by_attribute('id', id, html
, **kwargs
)
299 def get_element_by_class(class_name
, html
):
300 """Return the content of the first tag with the specified class in the passed HTML document"""
301 retval
= get_elements_by_class(class_name
, html
)
302 return retval
[0] if retval
else None
305 def get_element_html_by_class(class_name
, html
):
306 """Return the html of the first tag with the specified class in the passed HTML document"""
307 retval
= get_elements_html_by_class(class_name
, html
)
308 return retval
[0] if retval
else None
311 def get_element_by_attribute(attribute
, value
, html
, **kwargs
):
312 retval
= get_elements_by_attribute(attribute
, value
, html
, **kwargs
)
313 return retval
[0] if retval
else None
316 def get_element_html_by_attribute(attribute
, value
, html
, **kargs
):
317 retval
= get_elements_html_by_attribute(attribute
, value
, html
, **kargs
)
318 return retval
[0] if retval
else None
321 def get_elements_by_class(class_name
, html
, **kargs
):
322 """Return the content of all tags with the specified class in the passed HTML document as a list"""
323 return get_elements_by_attribute(
324 'class', r
'[^\'"]*(?<=[\'"\s
])%s(?
=[\'"\s])[^\'"]*' % re.escape(class_name),
325 html, escape_value=False)
328 def get_elements_html_by_class(class_name, html):
329 """Return the html of all tags with the specified class in the passed HTML document as a list"""
330 return get_elements_html_by_attribute(
331 'class', r'[^
\'"]*(?<=[\'"\s
])%s(?
=[\'"\s])[^\'"]*' % re.escape(class_name),
332 html, escape_value=False)
335 def get_elements_by_attribute(*args, **kwargs):
336 """Return the content of the tag with the specified attribute in the passed HTML document"""
337 return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
340 def get_elements_html_by_attribute(*args, **kwargs):
341 """Return the html of the tag with the specified attribute in the passed HTML document"""
342 return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
345 def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w
:.-]+', escape_value=True):
347 Return the text (content) and the html (whole) of the tag with the specified
348 attribute in the passed HTML document
353 quote = '' if re.match(r'''[\s"'`
=<>]''', value) else '?'
355 value = re.escape(value) if escape_value else value
357 partial_element_re = rf'''(?x
)
359 (?
:\
s(?
:[^
>"']|"[^
"]*"|
'[^']*')*)?
360 \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
363 for m in re.finditer(partial_element_re, html):
364 content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
367 unescapeHTML(re.sub(r'^(?P<q>["\'])(?P
<content
>.*)(?P
=q
)$
', r'\g
<content
>', content, flags=re.DOTALL)),
372 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
374 HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
375 closing tag for the first opening tag it has encountered, and can be used
379 class HTMLBreakOnClosingTagException(Exception):
383 self.tagstack = collections.deque()
384 html.parser.HTMLParser.__init__(self)
389 def __exit__(self, *_):
393 # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
394 # so data remains buffered; we no longer have any interest in it, thus
395 # override this method to discard it
398 def handle_starttag(self, tag, _):
399 self.tagstack.append(tag)
401 def handle_endtag(self, tag):
402 if not self.tagstack:
403 raise compat_HTMLParseError('no tags
in the stack
')
405 inner_tag = self.tagstack.pop()
409 raise compat_HTMLParseError(f'matching opening tag
for closing {tag} tag
not found
')
410 if not self.tagstack:
411 raise self.HTMLBreakOnClosingTagException()
414 # XXX: This should be far less strict
415 def get_element_text_and_html_by_tag(tag, html):
417 For the first element with the specified tag in the passed HTML document
418 return its' content (text
) and the whole
element (html
)
420 def find_or_raise(haystack, needle, exc):
422 return haystack.index(needle)
425 closing_tag = f'</{tag}>'
426 whole_start = find_or_raise(
427 html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
428 content_start = find_or_raise(
429 html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
430 content_start += whole_start + 1
431 with HTMLBreakOnClosingTagParser() as parser:
432 parser.feed(html[whole_start:content_start])
433 if not parser.tagstack or parser.tagstack[0] != tag:
434 raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
435 offset = content_start
436 while offset < len(html):
437 next_closing_tag_start = find_or_raise(
438 html[offset:], closing_tag,
439 compat_HTMLParseError(f'closing {tag} tag not found'))
440 next_closing_tag_end = next_closing_tag_start + len(closing_tag)
442 parser.feed(html[offset:offset + next_closing_tag_end])
443 offset += next_closing_tag_end
444 except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
445 return html[content_start:offset + next_closing_tag_start], \
446 html[whole_start:offset + next_closing_tag_end]
447 raise compat_HTMLParseError('unexpected end of html')
450 class HTMLAttributeParser(html.parser.HTMLParser):
451 """Trivial HTML parser to gather the attributes
for a single element
"""
455 html.parser.HTMLParser.__init__(self)
457 def handle_starttag(self, tag, attrs):
458 self.attrs = dict(attrs)
459 raise compat_HTMLParseError('done')
462 class HTMLListAttrsParser(html.parser.HTMLParser):
463 """HTML parser to gather the attributes
for the elements of a
list"""
466 html.parser.HTMLParser.__init__(self)
470 def handle_starttag(self, tag, attrs):
471 if tag == 'li' and self._level == 0:
472 self.items.append(dict(attrs))
475 def handle_endtag(self, tag):
479 def extract_attributes(html_element):
480 """Given a string
for an HTML element such
as
482 a
="foo" B
="bar" c
="&98;az" d
=boz
483 empty
= noval entity
="&"
486 Decode
and return a dictionary of attributes
.
488 'a': 'foo', 'b': 'bar', c
: 'baz', d
: 'boz',
489 'empty': '', 'noval': None, 'entity': '&',
490 'sq': '"', 'dq': '\''
493 parser = HTMLAttributeParser()
494 with contextlib.suppress(compat_HTMLParseError):
495 parser.feed(html_element)
500 def parse_list(webpage):
501 """Given a string
for an series of HTML
<li
> elements
,
502 return a dictionary of their attributes
"""
503 parser = HTMLListAttrsParser()
509 def clean_html(html):
510 """Clean an HTML snippet into a readable string
"""
512 if html is None: # Convenience for sanitizing descriptions etc.
515 html = re.sub(r'\s+', ' ', html)
516 html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
517 html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
519 html = re.sub('<.*?>', '', html)
520 # Replace html entities
521 html = unescapeHTML(html)
525 class LenientJSONDecoder(json.JSONDecoder):
527 def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs):
528 self.transform_source, self.ignore_extra = transform_source, ignore_extra
529 self._close_attempts = 2 * close_objects
530 super().__init__(*args, **kwargs)
533 def _close_object(err):
534 doc = err.doc[:err.pos]
535 # We need to add comma first to get the correct error message
536 if err.msg.startswith('Expecting \',\''):
538 elif not doc.endswith(','):
541 if err.msg.startswith('Expecting property name'):
542 return doc[:-1] + '}'
543 elif err.msg.startswith('Expecting value'):
544 return doc[:-1] + ']'
547 if self.transform_source:
548 s = self.transform_source(s)
549 for attempt in range(self._close_attempts + 1):
551 if self.ignore_extra:
552 return self.raw_decode(s.lstrip())[0]
553 return super().decode(s)
554 except json.JSONDecodeError as e:
557 elif attempt < self._close_attempts:
558 s = self._close_object(e)
561 raise type(e)(f'{e.msg} in {s[e.pos - 10:e.pos + 10]!r}', s, e.pos)
562 assert False, 'Too many attempts to decode JSON'
565 def sanitize_open(filename, open_mode):
566 """Try to
open the given filename
, and slightly tweak it
if this fails
.
568 Attempts to
open the given filename
. If this fails
, it tries to change
569 the filename slightly
, step by step
, until it
's either able to open it
570 or it fails and raises a final exception, like the standard open()
573 It returns the tuple (stream, definitive_file_name).
576 if sys.platform == 'win32
':
579 # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
580 with contextlib.suppress(io.UnsupportedOperation):
581 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
582 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
584 for attempt in range(2):
587 if sys.platform == 'win32
':
588 # FIXME: An exclusive lock also locks the file from being read.
589 # Since windows locks are mandatory, don't lock the
file on
windows (for now
).
590 # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
591 raise LockingUnsupportedError()
592 stream
= locked_file(filename
, open_mode
, block
=False).__enter
__()
594 stream
= open(filename
, open_mode
)
595 return stream
, filename
596 except OSError as err
:
597 if attempt
or err
.errno
in (errno
.EACCES
,):
599 old_filename
, filename
= filename
, sanitize_path(filename
)
600 if old_filename
== filename
:
604 def timeconvert(timestr
):
605 """Convert RFC 2822 defined time string into system timestamp"""
607 timetuple
= email
.utils
.parsedate_tz(timestr
)
608 if timetuple
is not None:
609 timestamp
= email
.utils
.mktime_tz(timetuple
)
613 def sanitize_filename(s
, restricted
=False, is_id
=NO_DEFAULT
):
614 """Sanitizes a string so it could be used as part of a filename.
615 @param restricted Use a stricter subset of allowed characters
616 @param is_id Whether this is an ID that should be kept unchanged if possible.
617 If unset, yt-dlp's new sanitization rules are in effect
622 def replace_insane(char
):
623 if restricted
and char
in ACCENT_CHARS
:
624 return ACCENT_CHARS
[char
]
625 elif not restricted
and char
== '\n':
627 elif is_id
is NO_DEFAULT
and not restricted
and char
in '"*:<>?|/\\':
628 # Replace with their full-width unicode counterparts
629 return {'/': '\u29F8', '\\': '\u29f9'}
.get(char
, chr(ord(char
) + 0xfee0))
630 elif char
== '?' or ord(char
) < 32 or ord(char
) == 127:
633 return '' if restricted
else '\''
635 return '\0_\0-' if restricted
else '\0 \0-'
636 elif char
in '\\/|*<>':
638 if restricted
and (char
in '!&\'()[]{}$;`^,#' or char
.isspace() or ord(char
) > 127):
639 return '' if unicodedata
.category(char
)[0] in 'CM' else '\0_'
642 # Replace look-alike Unicode glyphs
643 if restricted
and (is_id
is NO_DEFAULT
or not is_id
):
644 s
= unicodedata
.normalize('NFKC', s
)
645 s
= re
.sub(r
'[0-9]+(?::[0-9]+)+', lambda m
: m
.group(0).replace(':', '_'), s
) # Handle timestamps
646 result
= ''.join(map(replace_insane
, s
))
647 if is_id
is NO_DEFAULT
:
648 result
= re
.sub(r
'(\0.)(?:(?=\1)..)+', r
'\1', result
) # Remove repeated substitute chars
649 STRIP_RE
= r
'(?:\0.|[ _-])*'
650 result
= re
.sub(f
'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result
) # Remove substitute chars from start/end
651 result
= result
.replace('\0', '') or '_'
654 while '__' in result
:
655 result
= result
.replace('__', '_')
656 result
= result
.strip('_')
657 # Common case of "Foreign band name - English song title"
658 if restricted
and result
.startswith('-_'):
660 if result
.startswith('-'):
661 result
= '_' + result
[len('-'):]
662 result
= result
.lstrip('.')
668 def sanitize_path(s
, force
=False):
669 """Sanitizes and normalizes path on Windows"""
670 # XXX: this handles drive relative paths (c:sth) incorrectly
671 if sys
.platform
== 'win32':
673 drive_or_unc
, _
= os
.path
.splitdrive(s
)
679 norm_path
= os
.path
.normpath(remove_start(s
, drive_or_unc
)).split(os
.path
.sep
)
683 path_part
if path_part
in ['.', '..'] else re
.sub(r
'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part
)
684 for path_part
in norm_path
]
686 sanitized_path
.insert(0, drive_or_unc
+ os
.path
.sep
)
687 elif force
and s
and s
[0] == os
.path
.sep
:
688 sanitized_path
.insert(0, os
.path
.sep
)
689 # TODO: Fix behavioral differences <3.12
690 # The workaround using `normpath` only superficially passes tests
691 # Ref: https://github.com/python/cpython/pull/100351
692 return os
.path
.normpath(os
.path
.join(*sanitized_path
))
695 def sanitize_url(url
, *, scheme
='http'):
696 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
697 # the number of unwanted failures due to missing protocol
700 elif url
.startswith('//'):
701 return f
'{scheme}:{url}'
702 # Fix some common typos seen so far
704 # https://github.com/ytdl-org/youtube-dl/issues/15649
705 (r
'^httpss://', r
'https://'),
706 # https://bx1.be/lives/direct-tv/
707 (r
'^rmtp([es]?)://', r
'rtmp\1://'),
709 for mistake
, fixup
in COMMON_TYPOS
:
710 if re
.match(mistake
, url
):
711 return re
.sub(mistake
, fixup
, url
)
715 def extract_basic_auth(url
):
716 parts
= urllib
.parse
.urlsplit(url
)
717 if parts
.username
is None:
719 url
= urllib
.parse
.urlunsplit(parts
._replace
(netloc
=(
720 parts
.hostname
if parts
.port
is None
721 else '%s:%d' % (parts
.hostname
, parts
.port
))))
722 auth_payload
= base64
.b64encode(
723 ('%s:%s' % (parts
.username
, parts
.password
or '')).encode())
724 return url
, f
'Basic {auth_payload.decode()}'
728 """Expand shell variables and ~"""
729 return os
.path
.expandvars(compat_expanduser(s
))
732 def orderedSet(iterable
, *, lazy
=False):
733 """Remove all duplicates from the input iterable"""
735 seen
= [] # Do not use set since the items can be unhashable
741 return _iter() if lazy
else list(_iter())
744 def _htmlentity_transform(entity_with_semicolon
):
745 """Transforms an HTML entity to a character."""
746 entity
= entity_with_semicolon
[:-1]
748 # Known non-numeric HTML entity
749 if entity
in html
.entities
.name2codepoint
:
750 return chr(html
.entities
.name2codepoint
[entity
])
752 # TODO: HTML5 allows entities without a semicolon.
753 # E.g. 'Éric' should be decoded as 'Éric'.
754 if entity_with_semicolon
in html
.entities
.html5
:
755 return html
.entities
.html5
[entity_with_semicolon
]
757 mobj
= re
.match(r
'#(x[0-9a-fA-F]+|[0-9]+)', entity
)
759 numstr
= mobj
.group(1)
760 if numstr
.startswith('x'):
762 numstr
= '0%s' % numstr
765 # See https://github.com/ytdl-org/youtube-dl/issues/7518
766 with contextlib
.suppress(ValueError):
767 return chr(int(numstr
, base
))
769 # Unknown entity in name, return its literal representation
770 return '&%s;' % entity
776 assert isinstance(s
, str)
779 r
'&([^&;]+;)', lambda m
: _htmlentity_transform(m
.group(1)), s
)
782 def escapeHTML(text
):
785 .replace('&', '&')
786 .replace('<', '<')
787 .replace('>', '>')
788 .replace('"', '"')
789 .replace("'", ''')
793 class netrc_from_content(netrc
.netrc
):
794 def __init__(self
, content
):
795 self
.hosts
, self
.macros
= {}, {}
796 with io
.StringIO(content
) as stream
:
797 self
._parse
('-', stream
, False)
800 class Popen(subprocess
.Popen
):
801 if sys
.platform
== 'win32':
802 _startupinfo
= subprocess
.STARTUPINFO()
803 _startupinfo
.dwFlags |
= subprocess
.STARTF_USESHOWWINDOW
808 def _fix_pyinstaller_ld_path(env
):
809 """Restore LD_LIBRARY_PATH when using PyInstaller
810 Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
811 https://github.com/yt-dlp/yt-dlp/issues/4573
813 if not hasattr(sys
, '_MEIPASS'):
817 orig
= env
.get(f
'{key}_ORIG')
823 _fix('LD_LIBRARY_PATH') # Linux
824 _fix('DYLD_LIBRARY_PATH') # macOS
826 def __init__(self
, args
, *remaining
, env
=None, text
=False, shell
=False, **kwargs
):
828 env
= os
.environ
.copy()
829 self
._fix
_pyinstaller
_ld
_path
(env
)
831 self
.__text
_mode
= kwargs
.get('encoding') or kwargs
.get('errors') or text
or kwargs
.get('universal_newlines')
833 kwargs
['universal_newlines'] = True # For 3.6 compatibility
834 kwargs
.setdefault('encoding', 'utf-8')
835 kwargs
.setdefault('errors', 'replace')
837 if shell
and compat_os_name
== 'nt' and kwargs
.get('executable') is None:
838 if not isinstance(args
, str):
839 args
= ' '.join(compat_shlex_quote(a
) for a
in args
)
841 args
= f
'{self.__comspec()} /Q /S /D /V:OFF /C "{args}"'
843 super().__init
__(args
, *remaining
, env
=env
, shell
=shell
, **kwargs
, startupinfo
=self
._startupinfo
)
846 comspec
= os
.environ
.get('ComSpec') or os
.path
.join(
847 os
.environ
.get('SystemRoot', ''), 'System32', 'cmd.exe')
848 if os
.path
.isabs(comspec
):
850 raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set')
852 def communicate_or_kill(self
, *args
, **kwargs
):
854 return self
.communicate(*args
, **kwargs
)
855 except BaseException
: # Including KeyboardInterrupt
856 self
.kill(timeout
=None)
859 def kill(self
, *, timeout
=0):
862 self
.wait(timeout
=timeout
)
865 def run(cls
, *args
, timeout
=None, **kwargs
):
866 with cls(*args
, **kwargs
) as proc
:
867 default
= '' if proc
.__text
_mode
else b
''
868 stdout
, stderr
= proc
.communicate_or_kill(timeout
=timeout
)
869 return stdout
or default
, stderr
or default
, proc
.returncode
872 def encodeArgument(s
):
873 # Legacy code that uses byte strings
874 # Uncomment the following line after fixing all post processors
875 # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
876 return s
if isinstance(s
, str) else s
.decode('ascii')
879 _timetuple
= collections
.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
882 def timetuple_from_msec(msec
):
883 secs
, msec
= divmod(msec
, 1000)
884 mins
, secs
= divmod(secs
, 60)
885 hrs
, mins
= divmod(mins
, 60)
886 return _timetuple(hrs
, mins
, secs
, msec
)
889 def formatSeconds(secs
, delim
=':', msec
=False):
890 time
= timetuple_from_msec(secs
* 1000)
892 ret
= '%d%s%02d%s%02d' % (time
.hours
, delim
, time
.minutes
, delim
, time
.seconds
)
894 ret
= '%d%s%02d' % (time
.minutes
, delim
, time
.seconds
)
896 ret
= '%d' % time
.seconds
897 return '%s.%03d' % (ret
, time
.milliseconds
) if msec
else ret
900 def bug_reports_message(before
=';'):
901 from ..update
import REPOSITORY
903 msg
= (f
'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
904 'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
906 before
= before
.rstrip()
907 if not before
or before
.endswith(('.', '!', '?')):
908 msg
= msg
[0].title() + msg
[1:]
910 return (before
+ ' ' if before
else '') + msg
913 class YoutubeDLError(Exception):
914 """Base exception for YoutubeDL errors."""
917 def __init__(self
, msg
=None):
920 elif self
.msg
is None:
921 self
.msg
= type(self
).__name
__
922 super().__init
__(self
.msg
)
925 class ExtractorError(YoutubeDLError
):
926 """Error during info extraction."""
928 def __init__(self
, msg
, tb
=None, expected
=False, cause
=None, video_id
=None, ie
=None):
929 """ tb, if given, is the original traceback (so that it can be printed out).
930 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
932 from ..networking
.exceptions
import network_exceptions
933 if sys
.exc_info()[0] in network_exceptions
:
936 self
.orig_msg
= str(msg
)
938 self
.expected
= expected
940 self
.video_id
= video_id
942 self
.exc_info
= sys
.exc_info() # preserve original exception
943 if isinstance(self
.exc_info
[1], ExtractorError
):
944 self
.exc_info
= self
.exc_info
[1].exc_info
945 super().__init
__(self
.__msg
)
950 format_field(self
.ie
, None, '[%s] '),
951 format_field(self
.video_id
, None, '%s: '),
953 format_field(self
.cause
, None, ' (caused by %r)'),
954 '' if self
.expected
else bug_reports_message()))
956 def format_traceback(self
):
957 return join_nonempty(
958 self
.traceback
and ''.join(traceback
.format_tb(self
.traceback
)),
959 self
.cause
and ''.join(traceback
.format_exception(None, self
.cause
, self
.cause
.__traceback
__)[1:]),
962 def __setattr__(self
, name
, value
):
963 super().__setattr
__(name
, value
)
964 if getattr(self
, 'msg', None) and name
not in ('msg', 'args'):
965 self
.msg
= self
.__msg
or type(self
).__name
__
966 self
.args
= (self
.msg
, ) # Cannot be property
969 class UnsupportedError(ExtractorError
):
970 def __init__(self
, url
):
972 'Unsupported URL: %s' % url
, expected
=True)
976 class RegexNotFoundError(ExtractorError
):
977 """Error when a regex didn't match"""
981 class GeoRestrictedError(ExtractorError
):
982 """Geographic restriction Error exception.
984 This exception may be thrown when a video is not available from your
985 geographic location due to geographic restrictions imposed by a website.
988 def __init__(self
, msg
, countries
=None, **kwargs
):
989 kwargs
['expected'] = True
990 super().__init
__(msg
, **kwargs
)
991 self
.countries
= countries
994 class UserNotLive(ExtractorError
):
995 """Error when a channel/user is not live"""
997 def __init__(self
, msg
=None, **kwargs
):
998 kwargs
['expected'] = True
999 super().__init
__(msg
or 'The channel is not currently live', **kwargs
)
1002 class DownloadError(YoutubeDLError
):
1003 """Download Error exception.
1005 This exception may be thrown by FileDownloader objects if they are not
1006 configured to continue on errors. They will contain the appropriate
1010 def __init__(self
, msg
, exc_info
=None):
1011 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1012 super().__init
__(msg
)
1013 self
.exc_info
= exc_info
1016 class EntryNotInPlaylist(YoutubeDLError
):
1017 """Entry not in playlist exception.
1019 This exception will be thrown by YoutubeDL when a requested entry
1020 is not found in the playlist info_dict
1022 msg
= 'Entry not found in info'
1025 class SameFileError(YoutubeDLError
):
1026 """Same File exception.
1028 This exception will be thrown by FileDownloader objects if they detect
1029 multiple files would have to be downloaded to the same file on disk.
1031 msg
= 'Fixed output name but more than one file to download'
1033 def __init__(self
, filename
=None):
1034 if filename
is not None:
1035 self
.msg
+= f
': {filename}'
1036 super().__init
__(self
.msg
)
1039 class PostProcessingError(YoutubeDLError
):
1040 """Post Processing exception.
1042 This exception may be raised by PostProcessor's .run() method to
1043 indicate an error in the postprocessing task.
1047 class DownloadCancelled(YoutubeDLError
):
1048 """ Exception raised when the download queue should be interrupted """
1049 msg
= 'The download was cancelled'
1052 class ExistingVideoReached(DownloadCancelled
):
1053 """ --break-on-existing triggered """
1054 msg
= 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1057 class RejectedVideoReached(DownloadCancelled
):
1058 """ --break-match-filter triggered """
1059 msg
= 'Encountered a video that did not match filter, stopping due to --break-match-filter'
1062 class MaxDownloadsReached(DownloadCancelled
):
1063 """ --max-downloads limit has been reached. """
1064 msg
= 'Maximum number of downloads reached, stopping due to --max-downloads'
1067 class ReExtractInfo(YoutubeDLError
):
1068 """ Video info needs to be re-extracted. """
1070 def __init__(self
, msg
, expected
=False):
1071 super().__init
__(msg
)
1072 self
.expected
= expected
1075 class ThrottledDownload(ReExtractInfo
):
1076 """ Download speed below --throttled-rate. """
1077 msg
= 'The download speed is below throttle limit'
1080 super().__init
__(self
.msg
, expected
=False)
1083 class UnavailableVideoError(YoutubeDLError
):
1084 """Unavailable Format exception.
1086 This exception will be thrown when a video is requested
1087 in a format that is not available for that video.
1089 msg
= 'Unable to download video'
1091 def __init__(self
, err
=None):
1093 self
.msg
+= f
': {err}'
1094 super().__init
__(self
.msg
)
1097 class ContentTooShortError(YoutubeDLError
):
1098 """Content Too Short exception.
1100 This exception may be raised by FileDownloader objects when a file they
1101 download is too small for what the server announced first, indicating
1102 the connection was probably interrupted.
1105 def __init__(self
, downloaded
, expected
):
1106 super().__init
__(f
'Downloaded {downloaded} bytes, expected {expected} bytes')
1108 self
.downloaded
= downloaded
1109 self
.expected
= expected
1112 class XAttrMetadataError(YoutubeDLError
):
1113 def __init__(self
, code
=None, msg
='Unknown error'):
1114 super().__init
__(msg
)
1118 # Parsing code and msg
1119 if (self
.code
in (errno
.ENOSPC
, errno
.EDQUOT
)
1120 or 'No space left' in self
.msg
or 'Disk quota exceeded' in self
.msg
):
1121 self
.reason
= 'NO_SPACE'
1122 elif self
.code
== errno
.E2BIG
or 'Argument list too long' in self
.msg
:
1123 self
.reason
= 'VALUE_TOO_LONG'
1125 self
.reason
= 'NOT_SUPPORTED'
1128 class XAttrUnavailableError(YoutubeDLError
):
1132 def is_path_like(f
):
1133 return isinstance(f
, (str, bytes, os
.PathLike
))
1136 def extract_timezone(date_str
):
1139 ^.{8,}? # >=8 char non-TZ prefix, if present
1140 (?P<tz>Z| # just the UTC Z, or
1141 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
1142 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1143 [ ]? # optional space
1144 (?P<sign>\+|-) # +/-
1145 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
1149 m
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
)
1150 timezone
= TIMEZONE_NAMES
.get(m
and m
.group('tz').strip())
1151 if timezone
is not None:
1152 date_str
= date_str
[:-len(m
.group('tz'))]
1153 timezone
= datetime
.timedelta(hours
=timezone
or 0)
1155 date_str
= date_str
[:-len(m
.group('tz'))]
1156 if not m
.group('sign'):
1157 timezone
= datetime
.timedelta()
1159 sign
= 1 if m
.group('sign') == '+' else -1
1160 timezone
= datetime
.timedelta(
1161 hours
=sign
* int(m
.group('hours')),
1162 minutes
=sign
* int(m
.group('minutes')))
1163 return timezone
, date_str
1166 def parse_iso8601(date_str
, delimiter
='T', timezone
=None):
1167 """ Return a UNIX timestamp from the given date """
1169 if date_str
is None:
1172 date_str
= re
.sub(r
'\.[0-9]+', '', date_str
)
1174 if timezone
is None:
1175 timezone
, date_str
= extract_timezone(date_str
)
1177 with contextlib
.suppress(ValueError):
1178 date_format
= f
'%Y-%m-%d{delimiter}%H:%M:%S'
1179 dt
= datetime
.datetime
.strptime(date_str
, date_format
) - timezone
1180 return calendar
.timegm(dt
.timetuple())
1183 def date_formats(day_first
=True):
1184 return DATE_FORMATS_DAY_FIRST
if day_first
else DATE_FORMATS_MONTH_FIRST
1187 def unified_strdate(date_str
, day_first
=True):
1188 """Return a string with the date in the format YYYYMMDD"""
1190 if date_str
is None:
1194 date_str
= date_str
.replace(',', ' ')
1195 # Remove AM/PM + timezone
1196 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
1197 _
, date_str
= extract_timezone(date_str
)
1199 for expression
in date_formats(day_first
):
1200 with contextlib
.suppress(ValueError):
1201 upload_date
= datetime
.datetime
.strptime(date_str
, expression
).strftime('%Y%m%d')
1202 if upload_date
is None:
1203 timetuple
= email
.utils
.parsedate_tz(date_str
)
1205 with contextlib
.suppress(ValueError):
1206 upload_date
= datetime
.datetime(*timetuple
[:6]).strftime('%Y%m%d')
1207 if upload_date
is not None:
1208 return str(upload_date
)
1211 def unified_timestamp(date_str
, day_first
=True):
1212 if not isinstance(date_str
, str):
1215 date_str
= re
.sub(r
'\s+', ' ', re
.sub(
1216 r
'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str
))
1218 pm_delta
= 12 if re
.search(r
'(?i)PM', date_str
) else 0
1219 timezone
, date_str
= extract_timezone(date_str
)
1221 # Remove AM/PM + timezone
1222 date_str
= re
.sub(r
'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str
)
1224 # Remove unrecognized timezones from ISO 8601 alike timestamps
1225 m
= re
.search(r
'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str
)
1227 date_str
= date_str
[:-len(m
.group('tz'))]
1229 # Python only supports microseconds, so remove nanoseconds
1230 m
= re
.search(r
'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str
)
1232 date_str
= m
.group(1)
1234 for expression
in date_formats(day_first
):
1235 with contextlib
.suppress(ValueError):
1236 dt
= datetime
.datetime
.strptime(date_str
, expression
) - timezone
+ datetime
.timedelta(hours
=pm_delta
)
1237 return calendar
.timegm(dt
.timetuple())
1239 timetuple
= email
.utils
.parsedate_tz(date_str
)
1241 return calendar
.timegm(timetuple
) + pm_delta
* 3600 - timezone
.total_seconds()
1244 def determine_ext(url
, default_ext
='unknown_video'):
1245 if url
is None or '.' not in url
:
1247 guess
= url
.partition('?')[0].rpartition('.')[2]
1248 if re
.match(r
'^[A-Za-z0-9]+$', guess
):
1250 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1251 elif guess
.rstrip('/') in KNOWN_EXTENSIONS
:
1252 return guess
.rstrip('/')
1257 def subtitles_filename(filename
, sub_lang
, sub_format
, expected_real_ext
=None):
1258 return replace_extension(filename
, sub_lang
+ '.' + sub_format
, expected_real_ext
)
1261 def datetime_from_str(date_str
, precision
='auto', format
='%Y%m%d'):
1263 Return a datetime object from a string.
1265 (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1267 @param format strftime format of DATE
1268 @param precision Round the datetime object: auto|microsecond|second|minute|hour|day
1269 auto: round to the unit provided in date_str (if applicable).
1271 auto_precision
= False
1272 if precision
== 'auto':
1273 auto_precision
= True
1274 precision
= 'microsecond'
1275 today
= datetime_round(datetime
.datetime
.now(datetime
.timezone
.utc
), precision
)
1276 if date_str
in ('now', 'today'):
1278 if date_str
== 'yesterday':
1279 return today
- datetime
.timedelta(days
=1)
1281 r
'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1283 if match
is not None:
1284 start_time
= datetime_from_str(match
.group('start'), precision
, format
)
1285 time
= int(match
.group('time')) * (-1 if match
.group('sign') == '-' else 1)
1286 unit
= match
.group('unit')
1287 if unit
== 'month' or unit
== 'year':
1288 new_date
= datetime_add_months(start_time
, time
* 12 if unit
== 'year' else time
)
1294 delta
= datetime
.timedelta(**{unit + 's': time}
)
1295 new_date
= start_time
+ delta
1297 return datetime_round(new_date
, unit
)
1300 return datetime_round(datetime
.datetime
.strptime(date_str
, format
), precision
)
1303 def date_from_str(date_str
, format
='%Y%m%d', strict
=False):
1305 Return a date object from a string using datetime_from_str
1307 @param strict Restrict allowed patterns to "YYYYMMDD" and
1308 (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1310 if strict
and not re
.fullmatch(r
'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str
):
1311 raise ValueError(f
'Invalid date format "{date_str}"')
1312 return datetime_from_str(date_str
, precision
='microsecond', format
=format
).date()
1315 def datetime_add_months(dt
, months
):
1316 """Increment/Decrement a datetime object by months."""
1317 month
= dt
.month
+ months
- 1
1318 year
= dt
.year
+ month
// 12
1319 month
= month
% 12 + 1
1320 day
= min(dt
.day
, calendar
.monthrange(year
, month
)[1])
1321 return dt
.replace(year
, month
, day
)
1324 def datetime_round(dt
, precision
='day'):
1326 Round a datetime object's time to a specific precision
1328 if precision
== 'microsecond':
1337 roundto
= lambda x
, n
: ((x
+ n
/ 2) // n
) * n
1338 timestamp
= roundto(calendar
.timegm(dt
.timetuple()), unit_seconds
[precision
])
1339 return datetime
.datetime
.fromtimestamp(timestamp
, datetime
.timezone
.utc
)
1342 def hyphenate_date(date_str
):
1344 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1345 match
= re
.match(r
'^(\d\d\d\d)(\d\d)(\d\d)$', date_str
)
1346 if match
is not None:
1347 return '-'.join(match
.groups())
1353 """Represents a time interval between two dates"""
1355 def __init__(self
, start
=None, end
=None):
1356 """start and end must be strings in the format accepted by date"""
1357 if start
is not None:
1358 self
.start
= date_from_str(start
, strict
=True)
1360 self
.start
= datetime
.datetime
.min.date()
1362 self
.end
= date_from_str(end
, strict
=True)
1364 self
.end
= datetime
.datetime
.max.date()
1365 if self
.start
> self
.end
:
1366 raise ValueError('Date range: "%s" , the start date must be before the end date' % self
)
1370 """Returns a range that only contains the given day"""
1371 return cls(day
, day
)
1373 def __contains__(self
, date
):
1374 """Check if the date is in the range"""
1375 if not isinstance(date
, datetime
.date
):
1376 date
= date_from_str(date
)
1377 return self
.start
<= date
<= self
.end
1380 return f
'{__name__}.{type(self).__name__}({self.start.isoformat()!r}, {self.end.isoformat()!r})'
1382 def __eq__(self
, other
):
1383 return (isinstance(other
, DateRange
)
1384 and self
.start
== other
.start
and self
.end
== other
.end
)
1388 def system_identifier():
1389 python_implementation
= platform
.python_implementation()
1390 if python_implementation
== 'PyPy' and hasattr(sys
, 'pypy_version_info'):
1391 python_implementation
+= ' version %d.%d.%d' % sys
.pypy_version_info
[:3]
1393 with contextlib
.suppress(OSError): # We may not have access to the executable
1394 libc_ver
= platform
.libc_ver()
1396 return 'Python %s (%s %s %s) - %s (%s%s)' % (
1397 platform
.python_version(),
1398 python_implementation
,
1400 platform
.architecture()[0],
1401 platform
.platform(),
1402 ssl
.OPENSSL_VERSION
,
1403 format_field(join_nonempty(*libc_ver
, delim
=' '), None, ', %s'),
1408 def get_windows_version():
1409 ''' Get Windows version. returns () if it's not running on Windows '''
1410 if compat_os_name
== 'nt':
1411 return version_tuple(platform
.win32_ver()[1])
1416 def write_string(s
, out
=None, encoding
=None):
1417 assert isinstance(s
, str)
1418 out
= out
or sys
.stderr
1419 # `sys.stderr` might be `None` (Ref: https://github.com/pyinstaller/pyinstaller/pull/7217)
1423 if compat_os_name
== 'nt' and supports_terminal_sequences(out
):
1424 s
= re
.sub(r
'([\r\n]+)', r
' \1', s
)
1426 enc
, buffer = None, out
1427 # `mode` might be `None` (Ref: https://github.com/yt-dlp/yt-dlp/issues/8816)
1428 if 'b' in (getattr(out
, 'mode', None) or ''):
1429 enc
= encoding
or preferredencoding()
1430 elif hasattr(out
, 'buffer'):
1432 enc
= encoding
or getattr(out
, 'encoding', None) or preferredencoding()
1434 buffer.write(s
.encode(enc
, 'ignore') if enc
else s
)
1438 # TODO: Use global logger
1439 def deprecation_warning(msg
, *, printer
=None, stacklevel
=0, **kwargs
):
1440 from .. import _IN_CLI
1442 if msg
in deprecation_warning
._cache
:
1444 deprecation_warning
._cache
.add(msg
)
1446 return printer(f
'{msg}{bug_reports_message()}', **kwargs
)
1447 return write_string(f
'ERROR: {msg}{bug_reports_message()}\n', **kwargs
)
1450 warnings
.warn(DeprecationWarning(msg
), stacklevel
=stacklevel
+ 3)
1453 deprecation_warning
._cache
= set()
1456 def bytes_to_intlist(bs
):
1459 if isinstance(bs
[0], int): # Python 3
1462 return [ord(c
) for c
in bs
]
1465 def intlist_to_bytes(xs
):
1468 return struct
.pack('%dB' % len(xs
), *xs
)
1471 class LockingUnsupportedError(OSError):
1472 msg
= 'File locking is not supported'
1475 super().__init
__(self
.msg
)
1478 # Cross-platform file locking
1479 if sys
.platform
== 'win32':
1481 import ctypes
.wintypes
1484 class OVERLAPPED(ctypes
.Structure
):
1486 ('Internal', ctypes
.wintypes
.LPVOID
),
1487 ('InternalHigh', ctypes
.wintypes
.LPVOID
),
1488 ('Offset', ctypes
.wintypes
.DWORD
),
1489 ('OffsetHigh', ctypes
.wintypes
.DWORD
),
1490 ('hEvent', ctypes
.wintypes
.HANDLE
),
1493 kernel32
= ctypes
.WinDLL('kernel32')
1494 LockFileEx
= kernel32
.LockFileEx
1495 LockFileEx
.argtypes
= [
1496 ctypes
.wintypes
.HANDLE
, # hFile
1497 ctypes
.wintypes
.DWORD
, # dwFlags
1498 ctypes
.wintypes
.DWORD
, # dwReserved
1499 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
1500 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
1501 ctypes
.POINTER(OVERLAPPED
) # Overlapped
1503 LockFileEx
.restype
= ctypes
.wintypes
.BOOL
1504 UnlockFileEx
= kernel32
.UnlockFileEx
1505 UnlockFileEx
.argtypes
= [
1506 ctypes
.wintypes
.HANDLE
, # hFile
1507 ctypes
.wintypes
.DWORD
, # dwReserved
1508 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockLow
1509 ctypes
.wintypes
.DWORD
, # nNumberOfBytesToLockHigh
1510 ctypes
.POINTER(OVERLAPPED
) # Overlapped
1512 UnlockFileEx
.restype
= ctypes
.wintypes
.BOOL
1513 whole_low
= 0xffffffff
1514 whole_high
= 0x7fffffff
1516 def _lock_file(f
, exclusive
, block
):
1517 overlapped
= OVERLAPPED()
1518 overlapped
.Offset
= 0
1519 overlapped
.OffsetHigh
= 0
1520 overlapped
.hEvent
= 0
1521 f
._lock
_file
_overlapped
_p
= ctypes
.pointer(overlapped
)
1523 if not LockFileEx(msvcrt
.get_osfhandle(f
.fileno()),
1524 (0x2 if exclusive
else 0x0) |
(0x0 if block
else 0x1),
1525 0, whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1526 # NB: No argument form of "ctypes.FormatError" does not work on PyPy
1527 raise BlockingIOError(f
'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
1529 def _unlock_file(f
):
1530 assert f
._lock
_file
_overlapped
_p
1531 handle
= msvcrt
.get_osfhandle(f
.fileno())
1532 if not UnlockFileEx(handle
, 0, whole_low
, whole_high
, f
._lock
_file
_overlapped
_p
):
1533 raise OSError('Unlocking file failed: %r' % ctypes
.FormatError())
1539 def _lock_file(f
, exclusive
, block
):
1540 flags
= fcntl
.LOCK_EX
if exclusive
else fcntl
.LOCK_SH
1542 flags |
= fcntl
.LOCK_NB
1544 fcntl
.flock(f
, flags
)
1545 except BlockingIOError
:
1547 except OSError: # AOSP does not have flock()
1548 fcntl
.lockf(f
, flags
)
1550 def _unlock_file(f
):
1551 with contextlib
.suppress(OSError):
1552 return fcntl
.flock(f
, fcntl
.LOCK_UN
)
1553 with contextlib
.suppress(OSError):
1554 return fcntl
.lockf(f
, fcntl
.LOCK_UN
) # AOSP does not have flock()
1555 return fcntl
.flock(f
, fcntl
.LOCK_UN | fcntl
.LOCK_NB
) # virtiofs needs LOCK_NB on unlocking
1559 def _lock_file(f
, exclusive
, block
):
1560 raise LockingUnsupportedError()
1562 def _unlock_file(f
):
1563 raise LockingUnsupportedError()
1569 def __init__(self
, filename
, mode
, block
=True, encoding
=None):
1570 if mode
not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}
:
1571 raise NotImplementedError(mode
)
1572 self
.mode
, self
.block
= mode
, block
1574 writable
= any(f
in mode
for f
in 'wax+')
1575 readable
= any(f
in mode
for f
in 'r+')
1576 flags
= functools
.reduce(operator
.ior
, (
1577 getattr(os
, 'O_CLOEXEC', 0), # UNIX only
1578 getattr(os
, 'O_BINARY', 0), # Windows only
1579 getattr(os
, 'O_NOINHERIT', 0), # Windows only
1580 os
.O_CREAT
if writable
else 0, # O_TRUNC only after locking
1581 os
.O_APPEND
if 'a' in mode
else 0,
1582 os
.O_EXCL
if 'x' in mode
else 0,
1583 os
.O_RDONLY
if not writable
else os
.O_RDWR
if readable
else os
.O_WRONLY
,
1586 self
.f
= os
.fdopen(os
.open(filename
, flags
, 0o666), mode
, encoding
=encoding
)
1588 def __enter__(self
):
1589 exclusive
= 'r' not in self
.mode
1591 _lock_file(self
.f
, exclusive
, self
.block
)
1596 if 'w' in self
.mode
:
1599 except OSError as e
:
1601 errno
.ESPIPE
, # Illegal seek - expected for FIFO
1602 errno
.EINVAL
, # Invalid argument - expected for /dev/null
1611 _unlock_file(self
.f
)
1615 def __exit__(self
, *_
):
1624 def __getattr__(self
, attr
):
1625 return getattr(self
.f
, attr
)
1632 def get_filesystem_encoding():
1633 encoding
= sys
.getfilesystemencoding()
1634 return encoding
if encoding
is not None else 'utf-8'
1637 def shell_quote(args
):
1639 encoding
= get_filesystem_encoding()
1641 if isinstance(a
, bytes):
1642 # We may get a filename encoded with 'encodeFilename'
1643 a
= a
.decode(encoding
)
1644 quoted_args
.append(compat_shlex_quote(a
))
1645 return ' '.join(quoted_args
)
1648 def smuggle_url(url
, data
):
1649 """ Pass additional data in a URL for internal use. """
1651 url
, idata
= unsmuggle_url(url
, {})
1653 sdata
= urllib
.parse
.urlencode(
1654 {'__youtubedl_smuggle': json.dumps(data)}
)
1655 return url
+ '#' + sdata
1658 def unsmuggle_url(smug_url
, default
=None):
1659 if '#__youtubedl_smuggle' not in smug_url
:
1660 return smug_url
, default
1661 url
, _
, sdata
= smug_url
.rpartition('#')
1662 jsond
= urllib
.parse
.parse_qs(sdata
)['__youtubedl_smuggle'][0]
1663 data
= json
.loads(jsond
)
1667 def format_decimal_suffix(num
, fmt
='%d%s', *, factor
=1000):
1668 """ Formats numbers with decimal sufixes like K, M, etc """
1669 num
, factor
= float_or_none(num
), float(factor
)
1670 if num
is None or num
< 0:
1672 POSSIBLE_SUFFIXES
= 'kMGTPEZY'
1673 exponent
= 0 if num
== 0 else min(int(math
.log(num
, factor
)), len(POSSIBLE_SUFFIXES
))
1674 suffix
= ['', *POSSIBLE_SUFFIXES
][exponent
]
1676 suffix
= {'k': 'Ki', '': ''}
.get(suffix
, f
'{suffix}i')
1677 converted
= num
/ (factor
** exponent
)
1678 return fmt
% (converted
, suffix
)
1681 def format_bytes(bytes):
1682 return format_decimal_suffix(bytes, '%.2f%sB', factor
=1024) or 'N/A'
1685 def lookup_unit_table(unit_table
, s
, strict
=False):
1686 num_re
= NUMBER_RE
if strict
else NUMBER_RE
.replace(R
'\.', '[,.]')
1687 units_re
= '|'.join(re
.escape(u
) for u
in unit_table
)
1688 m
= (re
.fullmatch
if strict
else re
.match
)(
1689 rf
'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s
)
1693 num
= float(m
.group('num').replace(',', '.'))
1694 mult
= unit_table
[m
.group('unit')]
1695 return round(num
* mult
)
1699 """Parse a string indicating a byte quantity into an integer"""
1700 return lookup_unit_table(
1701 {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])}
,
1702 s
.upper(), strict
=True)
1705 def parse_filesize(s
):
1709 # The lower-case forms are of course incorrect and unofficial,
1710 # but we support those too
1727 'megabytes': 1000 ** 2,
1728 'mebibytes': 1024 ** 2,
1734 'gigabytes': 1000 ** 3,
1735 'gibibytes': 1024 ** 3,
1741 'terabytes': 1000 ** 4,
1742 'tebibytes': 1024 ** 4,
1748 'petabytes': 1000 ** 5,
1749 'pebibytes': 1024 ** 5,
1755 'exabytes': 1000 ** 6,
1756 'exbibytes': 1024 ** 6,
1762 'zettabytes': 1000 ** 7,
1763 'zebibytes': 1024 ** 7,
1769 'yottabytes': 1000 ** 8,
1770 'yobibytes': 1024 ** 8,
1773 return lookup_unit_table(_UNIT_TABLE
, s
)
1780 s
= re
.sub(r
'^[^\d]+\s', '', s
).strip()
1782 if re
.match(r
'^[\d,.]+$', s
):
1783 return str_to_int(s
)
1796 ret
= lookup_unit_table(_UNIT_TABLE
, s
)
1800 mobj
= re
.match(r
'([\d,.]+)(?:$|\s)', s
)
1802 return str_to_int(mobj
.group(1))
1805 def parse_resolution(s
, *, lenient
=False):
1810 mobj
= re
.search(r
'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s
)
1812 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s
)
1815 'width': int(mobj
.group('w')),
1816 'height': int(mobj
.group('h')),
1819 mobj
= re
.search(r
'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s
)
1821 return {'height': int(mobj.group(1))}
1823 mobj
= re
.search(r
'\b([48])[kK]\b', s
)
1825 return {'height': int(mobj.group(1)) * 540}
1830 def parse_bitrate(s
):
1831 if not isinstance(s
, str):
1833 mobj
= re
.search(r
'\b(\d+)\s*kbps', s
)
1835 return int(mobj
.group(1))
1838 def month_by_name(name
, lang
='en'):
1839 """ Return the number of a month by (locale-independently) English name """
1841 month_names
= MONTH_NAMES
.get(lang
, MONTH_NAMES
['en'])
1844 return month_names
.index(name
) + 1
1849 def month_by_abbreviation(abbrev
):
1850 """ Return the number of a month by (locale-independently) English
1854 return [s
[:3] for s
in ENGLISH_MONTH_NAMES
].index(abbrev
) + 1
1859 def fix_xml_ampersands(xml_str
):
1860 """Replace all the '&' by '&' in XML"""
1862 r
'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1867 def setproctitle(title
):
1868 assert isinstance(title
, str)
1870 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
1877 libc
= ctypes
.cdll
.LoadLibrary('libc.so.6')
1881 # LoadLibrary in Windows Python 2.7.13 only expects
1882 # a bytestring, but since unicode_literals turns
1883 # every string into a unicode string, it fails.
1885 title_bytes
= title
.encode()
1886 buf
= ctypes
.create_string_buffer(len(title_bytes
))
1887 buf
.value
= title_bytes
1889 # PR_SET_NAME = 15 Ref: /usr/include/linux/prctl.h
1890 libc
.prctl(15, buf
, 0, 0, 0)
1891 except AttributeError:
1892 return # Strange libc, just skip this
1895 def remove_start(s
, start
):
1896 return s
[len(start
):] if s
is not None and s
.startswith(start
) else s
1899 def remove_end(s
, end
):
1900 return s
[:-len(end
)] if s
is not None and s
.endswith(end
) else s
1903 def remove_quotes(s
):
1904 if s
is None or len(s
) < 2:
1906 for quote
in ('"', "'", ):
1907 if s
[0] == quote
and s
[-1] == quote
:
1912 def get_domain(url
):
1914 This implementation is inconsistent, but is kept for compatibility.
1915 Use this only for "webpage_url_domain"
1917 return remove_start(urllib
.parse
.urlparse(url
).netloc
, 'www.') or None
1920 def url_basename(url
):
1921 path
= urllib
.parse
.urlparse(url
).path
1922 return path
.strip('/').split('/')[-1]
1926 return re
.match(r
'https?://[^?#]+/', url
).group()
1929 def urljoin(base
, path
):
1930 if isinstance(path
, bytes):
1931 path
= path
.decode()
1932 if not isinstance(path
, str) or not path
:
1934 if re
.match(r
'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path
):
1936 if isinstance(base
, bytes):
1937 base
= base
.decode()
1938 if not isinstance(base
, str) or not re
.match(
1939 r
'^(?:https?:)?//', base
):
1941 return urllib
.parse
.urljoin(base
, path
)
1944 def int_or_none(v
, scale
=1, default
=None, get_attr
=None, invscale
=1):
1945 if get_attr
and v
is not None:
1946 v
= getattr(v
, get_attr
, None)
1948 return int(v
) * invscale
// scale
1949 except (ValueError, TypeError, OverflowError):
1953 def str_or_none(v
, default
=None):
1954 return default
if v
is None else str(v
)
1957 def str_to_int(int_str
):
1958 """ A more relaxed version of int_or_none """
1959 if isinstance(int_str
, int):
1961 elif isinstance(int_str
, str):
1962 int_str
= re
.sub(r
'[,\.\+]', '', int_str
)
1963 return int_or_none(int_str
)
1966 def float_or_none(v
, scale
=1, invscale
=1, default
=None):
1970 return float(v
) * invscale
/ scale
1971 except (ValueError, TypeError):
1975 def bool_or_none(v
, default
=None):
1976 return v
if isinstance(v
, bool) else default
1979 def strip_or_none(v
, default
=None):
1980 return v
.strip() if isinstance(v
, str) else default
1983 def url_or_none(url
):
1984 if not url
or not isinstance(url
, str):
1987 return url
if re
.match(r
'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url
) else None
1990 def strftime_or_none(timestamp
, date_format
='%Y%m%d', default
=None):
1991 datetime_object
= None
1993 if isinstance(timestamp
, (int, float)): # unix timestamp
1994 # Using naive datetime here can break timestamp() in Windows
1995 # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
1996 # Also, datetime.datetime.fromtimestamp breaks for negative timestamps
1997 # Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
1998 datetime_object
= (datetime
.datetime
.fromtimestamp(0, datetime
.timezone
.utc
)
1999 + datetime
.timedelta(seconds
=timestamp
))
2000 elif isinstance(timestamp
, str): # assume YYYYMMDD
2001 datetime_object
= datetime
.datetime
.strptime(timestamp
, '%Y%m%d')
2002 date_format
= re
.sub( # Support %s on windows
2003 r
'(?<!%)(%%)*%s', rf
'\g<1>{int(datetime_object.timestamp())}', date_format
)
2004 return datetime_object
.strftime(date_format
)
2005 except (ValueError, TypeError, AttributeError):
2009 def parse_duration(s
):
2010 if not isinstance(s
, str):
2016 days
, hours
, mins
, secs
, ms
= [None] * 5
2017 m
= re
.match(r
'''(?x)
2019 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2020 (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2021 (?P<ms>[.:][0-9]+)?Z?$
2024 days
, hours
, mins
, secs
, ms
= m
.group('days', 'hours', 'mins', 'secs', 'ms')
2029 [0-9]+\s*y(?:ears?)?,?\s*
2032 [0-9]+\s*m(?:onths?)?,?\s*
2035 [0-9]+\s*w(?:eeks?)?,?\s*
2038 (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2042 (?P<hours>[0-9]+)\s*h(?:(?:ou)?rs?)?,?\s*
2045 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2048 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2051 days
, hours
, mins
, secs
, ms
= m
.groups()
2053 m
= re
.match(r
'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s
)
2055 hours
, mins
= m
.groups()
2060 ms
= ms
.replace(':', '.')
2061 return sum(float(part
or 0) * mult
for part
, mult
in (
2062 (days
, 86400), (hours
, 3600), (mins
, 60), (secs
, 1), (ms
, 1)))
2065 def prepend_extension(filename
, ext
, expected_real_ext
=None):
2066 name
, real_ext
= os
.path
.splitext(filename
)
2068 f
'{name}.{ext}{real_ext}'
2069 if not expected_real_ext
or real_ext
[1:] == expected_real_ext
2070 else f
'{filename}.{ext}')
2073 def replace_extension(filename
, ext
, expected_real_ext
=None):
2074 name
, real_ext
= os
.path
.splitext(filename
)
2075 return '{}.{}'.format(
2076 name
if not expected_real_ext
or real_ext
[1:] == expected_real_ext
else filename
,
2080 def check_executable(exe
, args
=[]):
2081 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2082 args can be a list of arguments for a short output (like -version) """
2084 Popen
.run([exe
] + args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
2090 def _get_exe_version_output(exe
, args
):
2092 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2093 # SIGTTOU if yt-dlp is run in the background.
2094 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2095 stdout
, _
, ret
= Popen
.run([encodeArgument(exe
)] + args
, text
=True,
2096 stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
)
2104 def detect_exe_version(output
, version_re
=None, unrecognized
='present'):
2105 assert isinstance(output
, str)
2106 if version_re
is None:
2107 version_re
= r
'version\s+([-0-9._a-zA-Z]+)'
2108 m
= re
.search(version_re
, output
)
2115 def get_exe_version(exe
, args
=['--version'],
2116 version_re
=None, unrecognized
=('present', 'broken')):
2117 """ Returns the version of the specified executable,
2118 or False if the executable is not present """
2119 unrecognized
= variadic(unrecognized
)
2120 assert len(unrecognized
) in (1, 2)
2121 out
= _get_exe_version_output(exe
, args
)
2123 return unrecognized
[-1]
2124 return out
and detect_exe_version(out
, version_re
, unrecognized
[0])
2127 def frange(start
=0, stop
=None, step
=1):
2130 start
, stop
= 0, start
2131 sign
= [-1, 1][step
> 0] if step
else 0
2132 while sign
* start
< sign
* stop
:
2137 class LazyList(collections
.abc
.Sequence
):
2138 """Lazy immutable list from an iterable
2139 Note that slices of a LazyList are lists and not LazyList"""
2141 class IndexError(IndexError):
2144 def __init__(self
, iterable
, *, reverse
=False, _cache
=None):
2145 self
._iterable
= iter(iterable
)
2146 self
._cache
= [] if _cache
is None else _cache
2147 self
._reversed
= reverse
2151 # We need to consume the entire iterable to iterate in reverse
2152 yield from self
.exhaust()
2154 yield from self
._cache
2155 for item
in self
._iterable
:
2156 self
._cache
.append(item
)
2160 self
._cache
.extend(self
._iterable
)
2161 self
._iterable
= [] # Discard the emptied iterable to make it pickle-able
2165 """Evaluate the entire iterable"""
2166 return self
._exhaust
()[::-1 if self
._reversed
else 1]
2169 def _reverse_index(x
):
2170 return None if x
is None else ~x
2172 def __getitem__(self
, idx
):
2173 if isinstance(idx
, slice):
2175 idx
= slice(self
._reverse
_index
(idx
.start
), self
._reverse
_index
(idx
.stop
), -(idx
.step
or 1))
2176 start
, stop
, step
= idx
.start
, idx
.stop
, idx
.step
or 1
2177 elif isinstance(idx
, int):
2179 idx
= self
._reverse
_index
(idx
)
2180 start
, stop
, step
= idx
, idx
, 0
2182 raise TypeError('indices must be integers or slices')
2183 if ((start
or 0) < 0 or (stop
or 0) < 0
2184 or (start
is None and step
< 0)
2185 or (stop
is None and step
> 0)):
2186 # We need to consume the entire iterable to be able to slice from the end
2187 # Obviously, never use this with infinite iterables
2190 return self
._cache
[idx
]
2191 except IndexError as e
:
2192 raise self
.IndexError(e
) from e
2193 n
= max(start
or 0, stop
or 0) - len(self
._cache
) + 1
2195 self
._cache
.extend(itertools
.islice(self
._iterable
, n
))
2197 return self
._cache
[idx
]
2198 except IndexError as e
:
2199 raise self
.IndexError(e
) from e
2203 self
[-1] if self
._reversed
else self
[0]
2204 except self
.IndexError:
2210 return len(self
._cache
)
2212 def __reversed__(self
):
2213 return type(self
)(self
._iterable
, reverse
=not self
._reversed
, _cache
=self
._cache
)
2216 return type(self
)(self
._iterable
, reverse
=self
._reversed
, _cache
=self
._cache
)
2219 # repr and str should mimic a list. So we exhaust the iterable
2220 return repr(self
.exhaust())
2223 return repr(self
.exhaust())
2228 class IndexError(IndexError):
2232 # This is only useful for tests
2233 return len(self
.getslice())
2235 def __init__(self
, pagefunc
, pagesize
, use_cache
=True):
2236 self
._pagefunc
= pagefunc
2237 self
._pagesize
= pagesize
2238 self
._pagecount
= float('inf')
2239 self
._use
_cache
= use_cache
2242 def getpage(self
, pagenum
):
2243 page_results
= self
._cache
.get(pagenum
)
2244 if page_results
is None:
2245 page_results
= [] if pagenum
> self
._pagecount
else list(self
._pagefunc
(pagenum
))
2247 self
._cache
[pagenum
] = page_results
2250 def getslice(self
, start
=0, end
=None):
2251 return list(self
._getslice
(start
, end
))
2253 def _getslice(self
, start
, end
):
2254 raise NotImplementedError('This method must be implemented by subclasses')
2256 def __getitem__(self
, idx
):
2257 assert self
._use
_cache
, 'Indexing PagedList requires cache'
2258 if not isinstance(idx
, int) or idx
< 0:
2259 raise TypeError('indices must be non-negative integers')
2260 entries
= self
.getslice(idx
, idx
+ 1)
2262 raise self
.IndexError()
2266 return bool(self
.getslice(0, 1))
2269 class OnDemandPagedList(PagedList
):
2270 """Download pages until a page with less than maximum results"""
2272 def _getslice(self
, start
, end
):
2273 for pagenum
in itertools
.count(start
// self
._pagesize
):
2274 firstid
= pagenum
* self
._pagesize
2275 nextfirstid
= pagenum
* self
._pagesize
+ self
._pagesize
2276 if start
>= nextfirstid
:
2280 start
% self
._pagesize
2281 if firstid
<= start
< nextfirstid
2284 ((end
- 1) % self
._pagesize
) + 1
2285 if (end
is not None and firstid
<= end
<= nextfirstid
)
2289 page_results
= self
.getpage(pagenum
)
2291 self
._pagecount
= pagenum
- 1
2293 if startv
!= 0 or endv
is not None:
2294 page_results
= page_results
[startv
:endv
]
2295 yield from page_results
2297 # A little optimization - if current page is not "full", ie. does
2298 # not contain page_size videos then we can assume that this page
2299 # is the last one - there are no more ids on further pages -
2300 # i.e. no need to query again.
2301 if len(page_results
) + startv
< self
._pagesize
:
2304 # If we got the whole page, but the next page is not interesting,
2305 # break out early as well
2306 if end
== nextfirstid
:
2310 class InAdvancePagedList(PagedList
):
2311 """PagedList with total number of pages known in advance"""
2313 def __init__(self
, pagefunc
, pagecount
, pagesize
):
2314 PagedList
.__init
__(self
, pagefunc
, pagesize
, True)
2315 self
._pagecount
= pagecount
2317 def _getslice(self
, start
, end
):
2318 start_page
= start
// self
._pagesize
2319 end_page
= self
._pagecount
if end
is None else min(self
._pagecount
, end
// self
._pagesize
+ 1)
2320 skip_elems
= start
- start_page
* self
._pagesize
2321 only_more
= None if end
is None else end
- start
2322 for pagenum
in range(start_page
, end_page
):
2323 page_results
= self
.getpage(pagenum
)
2325 page_results
= page_results
[skip_elems
:]
2327 if only_more
is not None:
2328 if len(page_results
) < only_more
:
2329 only_more
-= len(page_results
)
2331 yield from page_results
[:only_more
]
2333 yield from page_results
2336 class PlaylistEntries
:
2337 MissingEntry
= object()
2338 is_exhausted
= False
2340 def __init__(self
, ydl
, info_dict
):
2343 # _entries must be assigned now since infodict can change during iteration
2344 entries
= info_dict
.get('entries')
2346 raise EntryNotInPlaylist('There are no entries')
2347 elif isinstance(entries
, list):
2348 self
.is_exhausted
= True
2350 requested_entries
= info_dict
.get('requested_entries')
2351 self
.is_incomplete
= requested_entries
is not None
2352 if self
.is_incomplete
:
2353 assert self
.is_exhausted
2354 self
._entries
= [self
.MissingEntry
] * max(requested_entries
or [0])
2355 for i
, entry
in zip(requested_entries
, entries
):
2356 self
._entries
[i
- 1] = entry
2357 elif isinstance(entries
, (list, PagedList
, LazyList
)):
2358 self
._entries
= entries
2360 self
._entries
= LazyList(entries
)
2362 PLAYLIST_ITEMS_RE
= re
.compile(r
'''(?x)
2363 (?P<start>[+-]?\d+)?
2365 (?P<end>[+-]?\d+|inf(?:inite)?)?
2366 (?::(?P<step>[+-]?\d+))?
2370 def parse_playlist_items(cls
, string
):
2371 for segment
in string
.split(','):
2373 raise ValueError('There is two or more consecutive commas')
2374 mobj
= cls
.PLAYLIST_ITEMS_RE
.fullmatch(segment
)
2376 raise ValueError(f
'{segment!r} is not a valid specification')
2377 start
, end
, step
, has_range
= mobj
.group('start', 'end', 'step', 'range')
2378 if int_or_none(step
) == 0:
2379 raise ValueError(f
'Step in {segment!r} cannot be zero')
2380 yield slice(int_or_none(start
), float_or_none(end
), int_or_none(step
)) if has_range
else int(start
)
2382 def get_requested_items(self
):
2383 playlist_items
= self
.ydl
.params
.get('playlist_items')
2384 playlist_start
= self
.ydl
.params
.get('playliststart', 1)
2385 playlist_end
= self
.ydl
.params
.get('playlistend')
2386 # For backwards compatibility, interpret -1 as whole list
2387 if playlist_end
in (-1, None):
2389 if not playlist_items
:
2390 playlist_items
= f
'{playlist_start}:{playlist_end}'
2391 elif playlist_start
!= 1 or playlist_end
:
2392 self
.ydl
.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once
=True)
2394 for index
in self
.parse_playlist_items(playlist_items
):
2395 for i
, entry
in self
[index
]:
2400 # The item may have just been added to archive. Don't break due to it
2401 if not self
.ydl
.params
.get('lazy_playlist'):
2402 # TODO: Add auto-generated fields
2403 self
.ydl
._match
_entry
(entry
, incomplete
=True, silent
=True)
2404 except (ExistingVideoReached
, RejectedVideoReached
):
2407 def get_full_count(self
):
2408 if self
.is_exhausted
and not self
.is_incomplete
:
2410 elif isinstance(self
._entries
, InAdvancePagedList
):
2411 if self
._entries
._pagesize
== 1:
2412 return self
._entries
._pagecount
2414 @functools.cached_property
2416 if isinstance(self
._entries
, list):
2419 entry
= self
._entries
[i
]
2421 entry
= self
.MissingEntry
2422 if not self
.is_incomplete
:
2423 raise self
.IndexError()
2424 if entry
is self
.MissingEntry
:
2425 raise EntryNotInPlaylist(f
'Entry {i + 1} cannot be found')
2430 return type(self
.ydl
)._handle
_extraction
_exceptions
(lambda _
, i
: self
._entries
[i
])(self
.ydl
, i
)
2431 except (LazyList
.IndexError, PagedList
.IndexError):
2432 raise self
.IndexError()
2435 def __getitem__(self
, idx
):
2436 if isinstance(idx
, int):
2437 idx
= slice(idx
, idx
)
2439 # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
2440 step
= 1 if idx
.step
is None else idx
.step
2441 if idx
.start
is None:
2442 start
= 0 if step
> 0 else len(self
) - 1
2444 start
= idx
.start
- 1 if idx
.start
>= 0 else len(self
) + idx
.start
2446 # NB: Do not call len(self) when idx == [:]
2447 if idx
.stop
is None:
2448 stop
= 0 if step
< 0 else float('inf')
2450 stop
= idx
.stop
- 1 if idx
.stop
>= 0 else len(self
) + idx
.stop
2451 stop
+= [-1, 1][step
> 0]
2453 for i
in frange(start
, stop
, step
):
2457 entry
= self
._getter
(i
)
2458 except self
.IndexError:
2459 self
.is_exhausted
= True
2466 return len(tuple(self
[:]))
2468 class IndexError(IndexError):
2472 def uppercase_escape(s
):
2473 unicode_escape
= codecs
.getdecoder('unicode_escape')
2475 r
'\\U[0-9a-fA-F]{8}',
2476 lambda m
: unicode_escape(m
.group(0))[0],
2480 def lowercase_escape(s
):
2481 unicode_escape
= codecs
.getdecoder('unicode_escape')
2483 r
'\\u[0-9a-fA-F]{4}',
2484 lambda m
: unicode_escape(m
.group(0))[0],
2488 def parse_qs(url
, **kwargs
):
2489 return urllib
.parse
.parse_qs(urllib
.parse
.urlparse(url
).query
, **kwargs
)
2492 def read_batch_urls(batch_fd
):
2494 if not isinstance(url
, str):
2495 url
= url
.decode('utf-8', 'replace')
2496 BOM_UTF8
= ('\xef\xbb\xbf', '\ufeff')
2497 for bom
in BOM_UTF8
:
2498 if url
.startswith(bom
):
2499 url
= url
[len(bom
):]
2501 if not url
or url
.startswith(('#', ';', ']')):
2503 # "#" cannot be stripped out since it is part of the URI
2504 # However, it can be safely stripped out if following a whitespace
2505 return re
.split(r
'\s#', url
, 1)[0].rstrip()
2507 with contextlib
.closing(batch_fd
) as fd
:
2508 return [url
for url
in map(fixup
, fd
) if url
]
2511 def urlencode_postdata(*args
, **kargs
):
2512 return urllib
.parse
.urlencode(*args
, **kargs
).encode('ascii')
2515 def update_url(url
, *, query_update
=None, **kwargs
):
2516 """Replace URL components specified by kwargs
2517 @param url str or parse url tuple
2518 @param query_update update query
2521 if isinstance(url
, str):
2522 if not kwargs
and not query_update
:
2525 url
= urllib
.parse
.urlparse(url
)
2527 assert 'query' not in kwargs
, 'query_update and query cannot be specified at the same time'
2528 kwargs
['query'] = urllib
.parse
.urlencode({
2529 **urllib
.parse
.parse_qs(url
.query
),
2532 return urllib
.parse
.urlunparse(url
._replace
(**kwargs
))
2535 def update_url_query(url
, query
):
2536 return update_url(url
, query_update
=query
)
2539 def _multipart_encode_impl(data
, boundary
):
2540 content_type
= 'multipart/form-data; boundary=%s' % boundary
2543 for k
, v
in data
.items():
2544 out
+= b
'--' + boundary
.encode('ascii') + b
'\r\n'
2545 if isinstance(k
, str):
2547 if isinstance(v
, str):
2549 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
2550 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
2551 content
= b
'Content-Disposition: form-data; name="' + k
+ b
'"\r\n\r\n' + v
+ b
'\r\n'
2552 if boundary
.encode('ascii') in content
:
2553 raise ValueError('Boundary overlaps with data')
2556 out
+= b
'--' + boundary
.encode('ascii') + b
'--\r\n'
2558 return out
, content_type
2561 def multipart_encode(data
, boundary
=None):
2563 Encode a dict to RFC 7578-compliant form-data
2566 A dict where keys and values can be either Unicode or bytes-like
2569 If specified a Unicode object, it's used as the boundary. Otherwise
2570 a random boundary is generated.
2572 Reference: https://tools.ietf.org/html/rfc7578
2574 has_specified_boundary
= boundary
is not None
2577 if boundary
is None:
2578 boundary
= '---------------' + str(random
.randrange(0x0fffffff, 0xffffffff))
2581 out
, content_type
= _multipart_encode_impl(data
, boundary
)
2584 if has_specified_boundary
:
2588 return out
, content_type
2591 def is_iterable_like(x
, allowed_types
=collections
.abc
.Iterable
, blocked_types
=NO_DEFAULT
):
2592 if blocked_types
is NO_DEFAULT
:
2593 blocked_types
= (str, bytes, collections
.abc
.Mapping
)
2594 return isinstance(x
, allowed_types
) and not isinstance(x
, blocked_types
)
2597 def variadic(x
, allowed_types
=NO_DEFAULT
):
2598 if not isinstance(allowed_types
, (tuple, type)):
2599 deprecation_warning('allowed_types should be a tuple or a type')
2600 allowed_types
= tuple(allowed_types
)
2601 return x
if is_iterable_like(x
, blocked_types
=allowed_types
) else (x
, )
2604 def try_call(*funcs
, expected_type
=None, args
=[], kwargs
={}):
2607 val
= f(*args
, **kwargs
)
2608 except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
2611 if expected_type
is None or isinstance(val
, expected_type
):
2615 def try_get(src
, getter
, expected_type
=None):
2616 return try_call(*variadic(getter
), args
=(src
,), expected_type
=expected_type
)
2619 def filter_dict(dct
, cndn
=lambda _
, v
: v
is not None):
2620 return {k: v for k, v in dct.items() if cndn(k, v)}
2623 def merge_dicts(*dicts
):
2625 for a_dict
in dicts
:
2626 for k
, v
in a_dict
.items():
2627 if (v
is not None and k
not in merged
2628 or isinstance(v
, str) and merged
[k
] == ''):
2633 def encode_compat_str(string
, encoding
=preferredencoding(), errors
='strict'):
2634 return string
if isinstance(string
, str) else str(string
, encoding
, errors
)
2646 TV_PARENTAL_GUIDELINES
= {
2656 def parse_age_limit(s
):
2657 # isinstance(False, int) is True. So type() must be used instead
2658 if type(s
) is int: # noqa: E721
2659 return s
if 0 <= s
<= 21 else None
2660 elif not isinstance(s
, str):
2662 m
= re
.match(r
'^(?P<age>\d{1,2})\+?$', s
)
2664 return int(m
.group('age'))
2667 return US_RATINGS
[s
]
2668 m
= re
.match(r
'^TV[_-]?(%s)$' % '|'.join(k
[3:] for k
in TV_PARENTAL_GUIDELINES
), s
)
2670 return TV_PARENTAL_GUIDELINES
['TV-' + m
.group(1)]
2674 def strip_jsonp(code
):
2677 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
2678 (?:\s*&&\s*(?P=func_name))?
2679 \s*\(\s*(?P<callback_data>.*)\);?
2680 \s*?(?://[^\n]*)*$''',
2681 r
'\g<callback_data>', code
)
2684 def js_to_json(code
, vars={}, *, strict
=False):
2685 # vars is a dict of var, val pairs to substitute
2686 STRING_QUOTES
= '\'"`'
2687 STRING_RE
= '|'.join(rf
'{q}(?:\\.|[^\\{q}])*{q}' for q
in STRING_QUOTES
)
2688 COMMENT_RE
= r
'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
2689 SKIP_RE
= fr
'\s*(?:{COMMENT_RE})?\s*'
2691 (fr
'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
2692 (fr
'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
2695 def process_escape(match
):
2696 JSON_PASSTHROUGH_ESCAPES
= R
'"\bfnrtu'
2697 escape
= match
.group(1) or match
.group(2)
2699 return (Rf
'\{escape}' if escape
in JSON_PASSTHROUGH_ESCAPES
2700 else R
'\u00' if escape
== 'x'
2701 else '' if escape
== '\n'
2704 def template_substitute(match
):
2705 evaluated
= js_to_json(match
.group(1), vars, strict
=strict
)
2706 if evaluated
[0] == '"':
2707 return json
.loads(evaluated
)
2712 if v
in ('true', 'false', 'null'):
2714 elif v
in ('undefined', 'void 0'):
2716 elif v
.startswith('/*') or v
.startswith('//') or v
.startswith('!') or v
== ',':
2719 if v
[0] in STRING_QUOTES
:
2720 v
= re
.sub(r
'(?s)\${([^}]+)}', template_substitute
, v
[1:-1]) if v
[0] == '`' else v
[1:-1]
2721 escaped
= re
.sub(r
'(?s)(")|\\(.)', process_escape
, v
)
2722 return f
'"{escaped}"'
2724 for regex
, base
in INTEGER_TABLE
:
2725 im
= re
.match(regex
, v
)
2727 i
= int(im
.group(1), base
)
2728 return f
'"{i}":' if v
.endswith(':') else str(i
)
2734 except json
.JSONDecodeError
:
2735 return json
.dumps(vars[v
])
2742 raise ValueError(f
'Unknown value: {v}')
2744 def create_map(mobj
):
2745 return json
.dumps(dict(json
.loads(js_to_json(mobj
.group(1) or '[]', vars=vars))))
2747 code
= re
.sub(r
'(?:new\s+)?Array\((.*?)\)', r
'[\g<1>]', code
)
2748 code
= re
.sub(r
'new Map\((\[.*?\])?\)', create_map
, code
)
2750 code
= re
.sub(rf
'new Date\(({STRING_RE})\)', r
'\g<1>', code
)
2751 code
= re
.sub(r
'new \w+\((.*?)\)', lambda m
: json
.dumps(m
.group(0)), code
)
2752 code
= re
.sub(r
'parseInt\([^\d]+(\d+)[^\d]+\)', r
'\1', code
)
2753 code
= re
.sub(r
'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^
)]*["\'])\s*\)', r'\1', code)
2755 return re.sub(rf'''(?sx)
2757 {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
2758 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
2759 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
2760 [0-9]+(?={SKIP_RE}:)|
2765 def qualities(quality_ids):
2766 """ Get a numeric quality value out of a list of possible values """
2769 return quality_ids.index(qid)
2775 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
2779 'default': '%(title)s [%(id)s].%(ext)s',
2780 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
2786 'description': 'description',
2787 'annotation': 'annotations.xml',
2788 'infojson': 'info.json',
2791 'pl_thumbnail': None,
2792 'pl_description': 'description',
2793 'pl_infojson': 'info.json',
2796 # As of [1] format syntax is:
2797 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
2798 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
2799 STR_FORMAT_RE_TMPL = r'''(?x)
2800 (?<!%)(?P<prefix>(?:%%)*)
2802 (?P<has_key>\((?P<key>{0})\))?
2804 (?P<conversion>[#0\-+ ]+)?
2806 (?P<precision>\.\d+)?
2807 (?P<len_mod>[hlL])? # unused in python
2808 {1} # conversion type
2813 STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'
2816 def limit_length(s, length):
2817 """ Add ellipses to overly long strings """
2822 return s[:length - len(ELLIPSES)] + ELLIPSES
2826 def version_tuple(v):
2827 return tuple(int(e) for e in re.split(r'[-.]', v))
2830 def is_outdated_version(version, limit, assume_new=True):
2832 return not assume_new
2834 return version_tuple(version) < version_tuple(limit)
2836 return not assume_new
2839 def ytdl_is_updateable():
2840 """ Returns if yt-dlp can be updated with -U """
2842 from ..update import is_non_updateable
2844 return not is_non_updateable()
2847 def args_to_str(args):
2848 # Get a short string representation for a subprocess command
2849 return ' '.join(compat_shlex_quote(a) for a in args)
2852 def error_to_str(err):
2853 return f'{type(err).__name__}: {err}'
2856 def mimetype2ext(mt, default=NO_DEFAULT):
2857 if not isinstance(mt, str):
2858 if default is not NO_DEFAULT:
2875 'x-matroska': 'mkv',
2877 'x-mp4-fragmented': 'mp4',
2882 # application (streaming playlists)
2886 'vnd.apple.mpegurl': 'm3u8',
2887 'vnd.ms-sstr+xml': 'ism',
2888 'x-mpegurl': 'm3u8',
2892 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
2893 # Using .mp3 as it's the most popular one
2894 'audio/mpeg': 'mp3',
2895 'audio/webm': 'webm',
2896 'audio/x-matroska': 'mka',
2897 'audio/x-mpegurl': 'm3u',
2905 'x-realaudio': 'ra',
2916 'vnd.wap.wbmp': 'wbmp',
2923 'filmstrip+json': 'fs',
2924 'smptett+xml': 'tt',
2927 'x-ms-sami': 'sami',
2936 mimetype = mt.partition(';')[0].strip().lower()
2937 _, _, subtype = mimetype.rpartition('/')
2939 ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
2942 elif default is not NO_DEFAULT:
2944 return subtype.replace('+', '.')
2947 def ext2mimetype(ext_or_url):
2950 if '.' not in ext_or_url:
2951 ext_or_url = f'file.{ext_or_url}'
2952 return mimetypes.guess_type(ext_or_url)[0]
2955 def parse_codecs(codecs_str):
2956 # http://tools.ietf.org/html/rfc6381
2959 split_codecs = list(filter(None, map(
2960 str.strip, codecs_str.strip().strip(',').split(','))))
2961 vcodec, acodec, scodec, hdr = None, None, None, None
2962 for full_codec in split_codecs:
2963 parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
2964 if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
2965 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
2969 if parts[0] in ('dvh1', 'dvhe'):
2971 elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
2973 elif parts[:2] == ['vp9', '2']:
2975 elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
2976 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
2977 acodec = acodec or full_codec
2978 elif parts[0] in ('stpp', 'wvtt'):
2979 scodec = scodec or full_codec
2981 write_string(f'WARNING: Unknown codec {full_codec}\n')
2982 if vcodec or acodec or scodec:
2984 'vcodec': vcodec or 'none',
2985 'acodec': acodec or 'none',
2986 'dynamic_range': hdr,
2987 **({'scodec': scodec} if scodec is not None else {}),
2989 elif len(split_codecs) == 2:
2991 'vcodec': split_codecs[0],
2992 'acodec': split_codecs[1],
2997 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
2998 assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
3000 allow_mkv = not preferences or 'mkv' in preferences
3002 if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3003 return 'mkv' # TODO: any other format allows this?
3005 # TODO: All codecs supported by parse_codecs isn't handled here
3006 COMPATIBLE_CODECS = {
3008 'av1', 'hevc', 'avc1', 'mp4a', 'ac-4', # fourcc (m3u8, mpd)
3009 'h264', 'aacl', 'ec-3', # Set in ISM
3012 'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3013 'vp9x', 'vp8x', # in the webm spec
3017 sanitize_codec = functools.partial(
3018 try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
3019 vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3021 for ext in preferences or COMPATIBLE_CODECS.keys():
3022 codec_set = COMPATIBLE_CODECS.get(ext, set())
3023 if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3027 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3030 for ext in preferences or vexts:
3031 current_exts = {ext, *vexts, *aexts}
3032 if ext == 'mkv' or current_exts == {ext} or any(
3033 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3035 return 'mkv' if allow_mkv else preferences[-1]
3038 def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
3039 getheader = url_handle.headers.get
3041 cd = getheader('Content-Disposition')
3043 m = re.match(r'attachment;\s*filename="(?P
<filename
>[^
"]+)"', cd)
3045 e = determine_ext(m.group('filename
'), default_ext=None)
3049 meta_ext = getheader('x
-amz
-meta
-name
')
3051 e = meta_ext.rpartition('.')[2]
3055 return mimetype2ext(getheader('Content
-Type
'), default=default)
3058 def encode_data_uri(data, mime_type):
3059 return 'data
:%s;base64
,%s' % (mime_type, base64.b64encode(data).decode('ascii
'))
3062 def age_restricted(content_limit, age_limit):
3063 """ Returns True iff the content should be blocked """
3065 if age_limit is None: # No limit set
3067 if content_limit is None:
3068 return False # Content available for everyone
3069 return age_limit < content_limit
3072 # List of known byte-order-marks (BOM)
3074 (b'\xef\xbb\xbf', 'utf
-8'),
3075 (b'\x00\x00\xfe\xff', 'utf
-32-be
'),
3076 (b'\xff\xfe\x00\x00', 'utf
-32-le
'),
3077 (b'\xff\xfe', 'utf
-16-le
'),
3078 (b'\xfe\xff', 'utf
-16-be
'),
3082 def is_html(first_bytes):
3083 """ Detect whether a file contains HTML by examining its first bytes. """
3086 for bom, enc in BOMS:
3087 while first_bytes.startswith(bom):
3088 encoding, first_bytes = enc, first_bytes[len(bom):]
3090 return re.match(r'^\s
*<', first_bytes.decode(encoding, 'replace
'))
3093 def determine_protocol(info_dict):
3094 protocol = info_dict.get('protocol
')
3095 if protocol is not None:
3098 url = sanitize_url(info_dict['url
'])
3099 if url.startswith('rtmp
'):
3101 elif url.startswith('mms
'):
3103 elif url.startswith('rtsp
'):
3106 ext = determine_ext(url)
3108 return 'm3u8
' if info_dict.get('is_live
') else 'm3u8_native
'
3112 return urllib.parse.urlparse(url).scheme
3115 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3116 """ Render a list of rows, each as a list of values.
3117 Text after a \t will be right aligned """
3119 return len(remove_terminal_sequences(string).replace('\t', ''))
3121 def get_max_lens(table):
3122 return [max(width(str(v)) for v in col) for col in zip(*table)]
3124 def filter_using_list(row, filterArray):
3125 return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3127 max_lens = get_max_lens(data) if hide_empty else []
3128 header_row = filter_using_list(header_row, max_lens)
3129 data = [filter_using_list(row, max_lens) for row in data]
3131 table = [header_row] + data
3132 max_lens = get_max_lens(table)
3135 table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3136 table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
3138 for pos, text in enumerate(map(str, row)):
3140 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3142 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3143 ret = '\n'.join(''.join(row).rstrip() for row in table)
3147 def _match_one(filter_part, dct, incomplete):
3148 # TODO: Generalize code with YoutubeDL._build_format_filter
3149 STRING_OPERATORS = {
3150 '*=': operator.contains,
3151 '^
=': lambda attr, value: attr.startswith(value),
3152 '$
=': lambda attr, value: attr.endswith(value),
3153 '~
=': lambda attr, value: re.search(value, attr),
3155 COMPARISON_OPERATORS = {
3157 '<=': operator.le, # "<=" must be defined above "<"
3164 if isinstance(incomplete, bool):
3165 is_incomplete = lambda _: incomplete
3167 is_incomplete = lambda k: k in incomplete
3169 operator_rex = re.compile(r'''(?x)
3171 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3173 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3176 ''' % '|
'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3177 m = operator_rex.fullmatch(filter_part.strip())
3180 unnegated_op = COMPARISON_OPERATORS[m['op
']]
3182 op = lambda attr, value: not unnegated_op(attr, value)
3185 comparison_value = m['quotedstrval
'] or m['strval
'] or m['intval
']
3187 comparison_value = comparison_value.replace(r'\
%s' % m['quote
'], m['quote
'])
3188 actual_value = dct.get(m['key
'])
3189 numeric_comparison = None
3190 if isinstance(actual_value, (int, float)):
3191 # If the original field is a string and matching comparisonvalue is
3192 # a number we should respect the origin of the original field
3193 # and process comparison value as a string (see
3194 # https://github.com/ytdl-org/youtube-dl/issues/11082)
3196 numeric_comparison = int(comparison_value)
3198 numeric_comparison = parse_filesize(comparison_value)
3199 if numeric_comparison is None:
3200 numeric_comparison = parse_filesize(f'{comparison_value}B
')
3201 if numeric_comparison is None:
3202 numeric_comparison = parse_duration(comparison_value)
3203 if numeric_comparison is not None and m['op
'] in STRING_OPERATORS:
3204 raise ValueError('Operator
%s only supports string values
!' % m['op
'])
3205 if actual_value is None:
3206 return is_incomplete(m['key
']) or m['none_inclusive
']
3207 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3210 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3211 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3213 operator_rex = re.compile(r'''(?x)
3214 (?P<op>%s)\s*(?P<key>[a-z_]+)
3215 ''' % '|
'.join(map(re.escape, UNARY_OPERATORS.keys())))
3216 m = operator_rex.fullmatch(filter_part.strip())
3218 op = UNARY_OPERATORS[m.group('op
')]
3219 actual_value = dct.get(m.group('key
'))
3220 if is_incomplete(m.group('key
')) and actual_value is None:
3222 return op(actual_value)
3224 raise ValueError('Invalid
filter part
%r' % filter_part)
3227 def match_str(filter_str, dct, incomplete=False):
3228 """ Filter a dictionary with a simple string syntax.
3229 @returns Whether the filter passes
3230 @param incomplete Set of keys that is expected to be missing from dct.
3231 Can be True/False to indicate all/none of the keys may be missing.
3232 All conditions on incomplete keys pass if the key is missing
3235 _match_one(filter_part.replace(r'\
&', '&'), dct, incomplete)
3236 for filter_part in re.split(r'(?
<!\\)&', filter_str))
3239 def match_filter_func(filters, breaking_filters=None):
3240 if not filters and not breaking_filters:
3242 breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
3243 filters = set(variadic(filters or []))
3245 interactive = '-' in filters
3249 def _match_func(info_dict, incomplete=False):
3250 ret = breaking_filters(info_dict, incomplete)
3252 raise RejectedVideoReached(ret)
3254 if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3255 return NO_DEFAULT if interactive and not incomplete else None
3257 video_title = info_dict.get('title
') or info_dict.get('id') or 'entry
'
3258 filter_str = ') |
('.join(map(str.strip, filters))
3259 return f'{video_title} does
not pass filter ({filter_str}
), skipping
..'
3263 class download_range_func:
3264 def __init__(self, chapters, ranges, from_info=False):
3265 self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
3267 def __call__(self, info_dict, ydl):
3269 warning = ('There are no chapters matching the regex
' if info_dict.get('chapters
')
3270 else 'Cannot match chapters since chapter information
is unavailable
')
3271 for regex in self.chapters or []:
3272 for i, chapter in enumerate(info_dict.get('chapters
') or []):
3273 if re.search(regex, chapter['title
']):
3275 yield {**chapter, 'index': i}
3276 if self.chapters and warning:
3277 ydl.to_screen(f'[info
] {info_dict["id"]}
: {warning}
')
3279 for start, end in self.ranges or []:
3281 'start_time
': self._handle_negative_timestamp(start, info_dict),
3282 'end_time
': self._handle_negative_timestamp(end, info_dict),
3285 if self.from_info and (info_dict.get('start_time
') or info_dict.get('end_time
')):
3287 'start_time
': info_dict.get('start_time
') or 0,
3288 'end_time
': info_dict.get('end_time
') or float('inf
'),
3290 elif not self.ranges and not self.chapters:
3294 def _handle_negative_timestamp(time, info):
3295 return max(info['duration
'] + time, 0) if info.get('duration
') and time < 0 else time
3297 def __eq__(self, other):
3298 return (isinstance(other, download_range_func)
3299 and self.chapters == other.chapters and self.ranges == other.ranges)
3302 return f'{__name__}
.{type(self).__name__}
({self.chapters}
, {self.ranges}
)'
3305 def parse_dfxp_time_expr(time_expr):
3309 mobj = re.match(rf'^
(?P
<time_offset
>{NUMBER_RE}
)s?$
', time_expr)
3311 return float(mobj.group('time_offset
'))
3313 mobj = re.match(r'^
(\d
+):(\d\d
):(\d\
d(?
:(?
:\
.|
:)\d
+)?
)$
', time_expr)
3315 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3318 def srt_subtitles_timecode(seconds):
3319 return '%02d
:%02d
:%02d
,%03d
' % timetuple_from_msec(seconds * 1000)
3322 def ass_subtitles_timecode(seconds):
3323 time = timetuple_from_msec(seconds * 1000)
3324 return '%01d
:%02d
:%02d
.%02d
' % (*time[:-1], time.milliseconds / 10)
3327 def dfxp2srt(dfxp_data):
3329 @param dfxp_data A bytes-like object containing DFXP data
3330 @returns A unicode object containing converted SRT data
3332 LEGACY_NAMESPACES = (
3333 (b'http
://www
.w3
.org
/ns
/ttml
', [
3334 b'http
://www
.w3
.org
/2004/11/ttaf1
',
3335 b'http
://www
.w3
.org
/2006/04/ttaf1
',
3336 b'http
://www
.w3
.org
/2006/10/ttaf1
',
3338 (b'http
://www
.w3
.org
/ns
/ttml
#styling', [
3339 b
'http://www.w3.org/ns/ttml#style',
3343 SUPPORTED_STYLING
= [
3352 _x
= functools
.partial(xpath_with_ns
, ns_map
={
3353 'xml': 'http://www.w3.org/XML/1998/namespace',
3354 'ttml': 'http://www.w3.org/ns/ttml',
3355 'tts': 'http://www.w3.org/ns/ttml#styling',
3361 class TTMLPElementParser
:
3363 _unclosed_elements
= []
3364 _applied_styles
= []
3366 def start(self
, tag
, attrib
):
3367 if tag
in (_x('ttml:br'), 'br'):
3370 unclosed_elements
= []
3372 element_style_id
= attrib
.get('style')
3374 style
.update(default_style
)
3375 if element_style_id
:
3376 style
.update(styles
.get(element_style_id
, {}))
3377 for prop
in SUPPORTED_STYLING
:
3378 prop_val
= attrib
.get(_x('tts:' + prop
))
3380 style
[prop
] = prop_val
3383 for k
, v
in sorted(style
.items()):
3384 if self
._applied
_styles
and self
._applied
_styles
[-1].get(k
) == v
:
3387 font
+= ' color="%s"' % v
3388 elif k
== 'fontSize':
3389 font
+= ' size="%s"' % v
3390 elif k
== 'fontFamily':
3391 font
+= ' face="%s"' % v
3392 elif k
== 'fontWeight' and v
== 'bold':
3394 unclosed_elements
.append('b')
3395 elif k
== 'fontStyle' and v
== 'italic':
3397 unclosed_elements
.append('i')
3398 elif k
== 'textDecoration' and v
== 'underline':
3400 unclosed_elements
.append('u')
3402 self
._out
+= '<font' + font
+ '>'
3403 unclosed_elements
.append('font')
3405 if self
._applied
_styles
:
3406 applied_style
.update(self
._applied
_styles
[-1])
3407 applied_style
.update(style
)
3408 self
._applied
_styles
.append(applied_style
)
3409 self
._unclosed
_elements
.append(unclosed_elements
)
3412 if tag
not in (_x('ttml:br'), 'br'):
3413 unclosed_elements
= self
._unclosed
_elements
.pop()
3414 for element
in reversed(unclosed_elements
):
3415 self
._out
+= '</%s>' % element
3416 if unclosed_elements
and self
._applied
_styles
:
3417 self
._applied
_styles
.pop()
3419 def data(self
, data
):
3423 return self
._out
.strip()
3425 # Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
3426 # This will not trigger false positives since only UTF-8 text is being replaced
3427 dfxp_data
= dfxp_data
.replace(b
'encoding=\'UTF-16\'', b
'encoding=\'UTF-8\'')
3429 def parse_node(node
):
3430 target
= TTMLPElementParser()
3431 parser
= xml
.etree
.ElementTree
.XMLParser(target
=target
)
3432 parser
.feed(xml
.etree
.ElementTree
.tostring(node
))
3433 return parser
.close()
3435 for k
, v
in LEGACY_NAMESPACES
:
3437 dfxp_data
= dfxp_data
.replace(ns
, k
)
3439 dfxp
= compat_etree_fromstring(dfxp_data
)
3441 paras
= dfxp
.findall(_x('.//ttml:p')) or dfxp
.findall('.//p')
3444 raise ValueError('Invalid dfxp/TTML subtitle')
3448 for style
in dfxp
.findall(_x('.//ttml:style')):
3449 style_id
= style
.get('id') or style
.get(_x('xml:id'))
3452 parent_style_id
= style
.get('style')
3454 if parent_style_id
not in styles
:
3457 styles
[style_id
] = styles
[parent_style_id
].copy()
3458 for prop
in SUPPORTED_STYLING
:
3459 prop_val
= style
.get(_x('tts:' + prop
))
3461 styles
.setdefault(style_id
, {})[prop
] = prop_val
3467 for p
in ('body', 'div'):
3468 ele
= xpath_element(dfxp
, [_x('.//ttml:' + p
), './/' + p
])
3471 style
= styles
.get(ele
.get('style'))
3474 default_style
.update(style
)
3476 for para
, index
in zip(paras
, itertools
.count(1)):
3477 begin_time
= parse_dfxp_time_expr(para
.attrib
.get('begin'))
3478 end_time
= parse_dfxp_time_expr(para
.attrib
.get('end'))
3479 dur
= parse_dfxp_time_expr(para
.attrib
.get('dur'))
3480 if begin_time
is None:
3485 end_time
= begin_time
+ dur
3486 out
.append('%d\n%s --> %s\n%s\n\n' % (
3488 srt_subtitles_timecode(begin_time
),
3489 srt_subtitles_timecode(end_time
),
3495 def cli_option(params
, command_option
, param
, separator
=None):
3496 param
= params
.get(param
)
3497 return ([] if param
is None
3498 else [command_option
, str(param
)] if separator
is None
3499 else [f
'{command_option}{separator}{param}'])
3502 def cli_bool_option(params
, command_option
, param
, true_value
='true', false_value
='false', separator
=None):
3503 param
= params
.get(param
)
3504 assert param
in (True, False, None)
3505 return cli_option({True: true_value, False: false_value}
, command_option
, param
, separator
)
3508 def cli_valueless_option(params
, command_option
, param
, expected_value
=True):
3509 return [command_option
] if params
.get(param
) == expected_value
else []
3512 def cli_configuration_args(argdict
, keys
, default
=[], use_compat
=True):
3513 if isinstance(argdict
, (list, tuple)): # for backward compatibility
3520 assert isinstance(argdict
, dict)
3522 assert isinstance(keys
, (list, tuple))
3523 for key_list
in keys
:
3524 arg_list
= list(filter(
3525 lambda x
: x
is not None,
3526 [argdict
.get(key
.lower()) for key
in variadic(key_list
)]))
3528 return [arg
for args
in arg_list
for arg
in args
]
3532 def _configuration_args(main_key
, argdict
, exe
, keys
=None, default
=[], use_compat
=True):
3533 main_key
, exe
= main_key
.lower(), exe
.lower()
3534 root_key
= exe
if main_key
== exe
else f
'{main_key}+{exe}'
3535 keys
= [f
'{root_key}{k}' for k
in (keys
or [''])]
3536 if root_key
in keys
:
3538 keys
.append((main_key
, exe
))
3539 keys
.append('default')
3542 return cli_configuration_args(argdict
, keys
, default
, use_compat
)
3546 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3605 'iw': 'heb', # Replaced by he in 1989 revision
3615 'in': 'ind', # Replaced by id in 1989 revision
3731 'ji': 'yid', # Replaced by yi in 1989 revision
3739 def short2long(cls
, code
):
3740 """Convert language code from ISO 639-1 to ISO 639-2/T"""
3741 return cls
._lang
_map
.get(code
[:2])
3744 def long2short(cls
, code
):
3745 """Convert language code from ISO 639-2/T to ISO 639-1"""
3746 for short_name
, long_name
in cls
._lang
_map
.items():
3747 if long_name
== code
:
3752 # From http://data.okfn.org/data/core/country-list
3754 'AF': 'Afghanistan',
3755 'AX': 'Åland Islands',
3758 'AS': 'American Samoa',
3763 'AG': 'Antigua and Barbuda',
3780 'BO': 'Bolivia, Plurinational State of',
3781 'BQ': 'Bonaire, Sint Eustatius and Saba',
3782 'BA': 'Bosnia and Herzegovina',
3784 'BV': 'Bouvet Island',
3786 'IO': 'British Indian Ocean Territory',
3787 'BN': 'Brunei Darussalam',
3789 'BF': 'Burkina Faso',
3795 'KY': 'Cayman Islands',
3796 'CF': 'Central African Republic',
3800 'CX': 'Christmas Island',
3801 'CC': 'Cocos (Keeling) Islands',
3805 'CD': 'Congo, the Democratic Republic of the',
3806 'CK': 'Cook Islands',
3808 'CI': 'Côte d\'Ivoire',
3813 'CZ': 'Czech Republic',
3817 'DO': 'Dominican Republic',
3820 'SV': 'El Salvador',
3821 'GQ': 'Equatorial Guinea',
3825 'FK': 'Falkland Islands (Malvinas)',
3826 'FO': 'Faroe Islands',
3830 'GF': 'French Guiana',
3831 'PF': 'French Polynesia',
3832 'TF': 'French Southern Territories',
3847 'GW': 'Guinea-Bissau',
3850 'HM': 'Heard Island and McDonald Islands',
3851 'VA': 'Holy See (Vatican City State)',
3858 'IR': 'Iran, Islamic Republic of',
3861 'IM': 'Isle of Man',
3871 'KP': 'Korea, Democratic People\'s Republic of',
3872 'KR': 'Korea, Republic of',
3875 'LA': 'Lao People\'s Democratic Republic',
3881 'LI': 'Liechtenstein',
3885 'MK': 'Macedonia, the Former Yugoslav Republic of',
3892 'MH': 'Marshall Islands',
3898 'FM': 'Micronesia, Federated States of',
3899 'MD': 'Moldova, Republic of',
3910 'NL': 'Netherlands',
3911 'NC': 'New Caledonia',
3912 'NZ': 'New Zealand',
3917 'NF': 'Norfolk Island',
3918 'MP': 'Northern Mariana Islands',
3923 'PS': 'Palestine, State of',
3925 'PG': 'Papua New Guinea',
3928 'PH': 'Philippines',
3932 'PR': 'Puerto Rico',
3936 'RU': 'Russian Federation',
3938 'BL': 'Saint Barthélemy',
3939 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
3940 'KN': 'Saint Kitts and Nevis',
3941 'LC': 'Saint Lucia',
3942 'MF': 'Saint Martin (French part)',
3943 'PM': 'Saint Pierre and Miquelon',
3944 'VC': 'Saint Vincent and the Grenadines',
3947 'ST': 'Sao Tome and Principe',
3948 'SA': 'Saudi Arabia',
3952 'SL': 'Sierra Leone',
3954 'SX': 'Sint Maarten (Dutch part)',
3957 'SB': 'Solomon Islands',
3959 'ZA': 'South Africa',
3960 'GS': 'South Georgia and the South Sandwich Islands',
3961 'SS': 'South Sudan',
3966 'SJ': 'Svalbard and Jan Mayen',
3969 'CH': 'Switzerland',
3970 'SY': 'Syrian Arab Republic',
3971 'TW': 'Taiwan, Province of China',
3973 'TZ': 'Tanzania, United Republic of',
3975 'TL': 'Timor-Leste',
3979 'TT': 'Trinidad and Tobago',
3982 'TM': 'Turkmenistan',
3983 'TC': 'Turks and Caicos Islands',
3987 'AE': 'United Arab Emirates',
3988 'GB': 'United Kingdom',
3989 'US': 'United States',
3990 'UM': 'United States Minor Outlying Islands',
3994 'VE': 'Venezuela, Bolivarian Republic of',
3996 'VG': 'Virgin Islands, British',
3997 'VI': 'Virgin Islands, U.S.',
3998 'WF': 'Wallis and Futuna',
3999 'EH': 'Western Sahara',
4003 # Not ISO 3166 codes, but used for IP blocks
4004 'AP': 'Asia/Pacific Region',
4009 def short2full(cls
, code
):
4010 """Convert an ISO 3166-2 country code to the corresponding full name"""
4011 return cls
._country
_map
.get(code
.upper())
4015 # Major IPv4 address blocks per country
4017 'AD': '46.172.224.0/19',
4018 'AE': '94.200.0.0/13',
4019 'AF': '149.54.0.0/17',
4020 'AG': '209.59.64.0/18',
4021 'AI': '204.14.248.0/21',
4022 'AL': '46.99.0.0/16',
4023 'AM': '46.70.0.0/15',
4024 'AO': '105.168.0.0/13',
4025 'AP': '182.50.184.0/21',
4026 'AQ': '23.154.160.0/24',
4027 'AR': '181.0.0.0/12',
4028 'AS': '202.70.112.0/20',
4029 'AT': '77.116.0.0/14',
4030 'AU': '1.128.0.0/11',
4031 'AW': '181.41.0.0/18',
4032 'AX': '185.217.4.0/22',
4033 'AZ': '5.197.0.0/16',
4034 'BA': '31.176.128.0/17',
4035 'BB': '65.48.128.0/17',
4036 'BD': '114.130.0.0/16',
4038 'BF': '102.178.0.0/15',
4039 'BG': '95.42.0.0/15',
4040 'BH': '37.131.0.0/17',
4041 'BI': '154.117.192.0/18',
4042 'BJ': '137.255.0.0/16',
4043 'BL': '185.212.72.0/23',
4044 'BM': '196.12.64.0/18',
4045 'BN': '156.31.0.0/16',
4046 'BO': '161.56.0.0/16',
4047 'BQ': '161.0.80.0/20',
4048 'BR': '191.128.0.0/12',
4049 'BS': '24.51.64.0/18',
4050 'BT': '119.2.96.0/19',
4051 'BW': '168.167.0.0/16',
4052 'BY': '178.120.0.0/13',
4053 'BZ': '179.42.192.0/18',
4054 'CA': '99.224.0.0/11',
4055 'CD': '41.243.0.0/16',
4056 'CF': '197.242.176.0/21',
4057 'CG': '160.113.0.0/16',
4058 'CH': '85.0.0.0/13',
4059 'CI': '102.136.0.0/14',
4060 'CK': '202.65.32.0/19',
4061 'CL': '152.172.0.0/14',
4062 'CM': '102.244.0.0/14',
4063 'CN': '36.128.0.0/10',
4064 'CO': '181.240.0.0/12',
4065 'CR': '201.192.0.0/12',
4066 'CU': '152.206.0.0/15',
4067 'CV': '165.90.96.0/19',
4068 'CW': '190.88.128.0/17',
4069 'CY': '31.153.0.0/16',
4070 'CZ': '88.100.0.0/14',
4072 'DJ': '197.241.0.0/17',
4073 'DK': '87.48.0.0/12',
4074 'DM': '192.243.48.0/20',
4075 'DO': '152.166.0.0/15',
4076 'DZ': '41.96.0.0/12',
4077 'EC': '186.68.0.0/15',
4078 'EE': '90.190.0.0/15',
4079 'EG': '156.160.0.0/11',
4080 'ER': '196.200.96.0/20',
4081 'ES': '88.0.0.0/11',
4082 'ET': '196.188.0.0/14',
4083 'EU': '2.16.0.0/13',
4084 'FI': '91.152.0.0/13',
4085 'FJ': '144.120.0.0/16',
4086 'FK': '80.73.208.0/21',
4087 'FM': '119.252.112.0/20',
4088 'FO': '88.85.32.0/19',
4090 'GA': '41.158.0.0/15',
4092 'GD': '74.122.88.0/21',
4093 'GE': '31.146.0.0/16',
4094 'GF': '161.22.64.0/18',
4095 'GG': '62.68.160.0/19',
4096 'GH': '154.160.0.0/12',
4097 'GI': '95.164.0.0/16',
4098 'GL': '88.83.0.0/19',
4099 'GM': '160.182.0.0/15',
4100 'GN': '197.149.192.0/18',
4101 'GP': '104.250.0.0/19',
4102 'GQ': '105.235.224.0/20',
4103 'GR': '94.64.0.0/13',
4104 'GT': '168.234.0.0/16',
4105 'GU': '168.123.0.0/16',
4106 'GW': '197.214.80.0/20',
4107 'GY': '181.41.64.0/18',
4108 'HK': '113.252.0.0/14',
4109 'HN': '181.210.0.0/16',
4110 'HR': '93.136.0.0/13',
4111 'HT': '148.102.128.0/17',
4112 'HU': '84.0.0.0/14',
4113 'ID': '39.192.0.0/10',
4114 'IE': '87.32.0.0/12',
4115 'IL': '79.176.0.0/13',
4116 'IM': '5.62.80.0/20',
4117 'IN': '117.192.0.0/10',
4118 'IO': '203.83.48.0/21',
4119 'IQ': '37.236.0.0/14',
4120 'IR': '2.176.0.0/12',
4121 'IS': '82.221.0.0/16',
4122 'IT': '79.0.0.0/10',
4123 'JE': '87.244.64.0/18',
4124 'JM': '72.27.0.0/17',
4125 'JO': '176.29.0.0/16',
4126 'JP': '133.0.0.0/8',
4127 'KE': '105.48.0.0/12',
4128 'KG': '158.181.128.0/17',
4129 'KH': '36.37.128.0/17',
4130 'KI': '103.25.140.0/22',
4131 'KM': '197.255.224.0/20',
4132 'KN': '198.167.192.0/19',
4133 'KP': '175.45.176.0/22',
4134 'KR': '175.192.0.0/10',
4135 'KW': '37.36.0.0/14',
4136 'KY': '64.96.0.0/15',
4137 'KZ': '2.72.0.0/13',
4138 'LA': '115.84.64.0/18',
4139 'LB': '178.135.0.0/16',
4140 'LC': '24.92.144.0/20',
4141 'LI': '82.117.0.0/19',
4142 'LK': '112.134.0.0/15',
4143 'LR': '102.183.0.0/16',
4144 'LS': '129.232.0.0/17',
4145 'LT': '78.56.0.0/13',
4146 'LU': '188.42.0.0/16',
4147 'LV': '46.109.0.0/16',
4148 'LY': '41.252.0.0/14',
4149 'MA': '105.128.0.0/11',
4150 'MC': '88.209.64.0/18',
4151 'MD': '37.246.0.0/16',
4152 'ME': '178.175.0.0/17',
4153 'MF': '74.112.232.0/21',
4154 'MG': '154.126.0.0/17',
4155 'MH': '117.103.88.0/21',
4156 'MK': '77.28.0.0/15',
4157 'ML': '154.118.128.0/18',
4158 'MM': '37.111.0.0/17',
4159 'MN': '49.0.128.0/17',
4160 'MO': '60.246.0.0/16',
4161 'MP': '202.88.64.0/20',
4162 'MQ': '109.203.224.0/19',
4163 'MR': '41.188.64.0/18',
4164 'MS': '208.90.112.0/22',
4165 'MT': '46.11.0.0/16',
4166 'MU': '105.16.0.0/12',
4167 'MV': '27.114.128.0/18',
4168 'MW': '102.70.0.0/15',
4169 'MX': '187.192.0.0/11',
4170 'MY': '175.136.0.0/13',
4171 'MZ': '197.218.0.0/15',
4172 'NA': '41.182.0.0/16',
4173 'NC': '101.101.0.0/18',
4174 'NE': '197.214.0.0/18',
4175 'NF': '203.17.240.0/22',
4176 'NG': '105.112.0.0/12',
4177 'NI': '186.76.0.0/15',
4178 'NL': '145.96.0.0/11',
4179 'NO': '84.208.0.0/13',
4180 'NP': '36.252.0.0/15',
4181 'NR': '203.98.224.0/19',
4182 'NU': '49.156.48.0/22',
4183 'NZ': '49.224.0.0/14',
4184 'OM': '5.36.0.0/15',
4185 'PA': '186.72.0.0/15',
4186 'PE': '186.160.0.0/14',
4187 'PF': '123.50.64.0/18',
4188 'PG': '124.240.192.0/19',
4189 'PH': '49.144.0.0/13',
4190 'PK': '39.32.0.0/11',
4191 'PL': '83.0.0.0/11',
4192 'PM': '70.36.0.0/20',
4193 'PR': '66.50.0.0/16',
4194 'PS': '188.161.0.0/16',
4195 'PT': '85.240.0.0/13',
4196 'PW': '202.124.224.0/20',
4197 'PY': '181.120.0.0/14',
4198 'QA': '37.210.0.0/15',
4199 'RE': '102.35.0.0/16',
4200 'RO': '79.112.0.0/13',
4201 'RS': '93.86.0.0/15',
4202 'RU': '5.136.0.0/13',
4203 'RW': '41.186.0.0/16',
4204 'SA': '188.48.0.0/13',
4205 'SB': '202.1.160.0/19',
4206 'SC': '154.192.0.0/11',
4207 'SD': '102.120.0.0/13',
4208 'SE': '78.64.0.0/12',
4209 'SG': '8.128.0.0/10',
4210 'SI': '188.196.0.0/14',
4211 'SK': '78.98.0.0/15',
4212 'SL': '102.143.0.0/17',
4213 'SM': '89.186.32.0/19',
4214 'SN': '41.82.0.0/15',
4215 'SO': '154.115.192.0/18',
4216 'SR': '186.179.128.0/17',
4217 'SS': '105.235.208.0/21',
4218 'ST': '197.159.160.0/19',
4219 'SV': '168.243.0.0/16',
4220 'SX': '190.102.0.0/20',
4222 'SZ': '41.84.224.0/19',
4223 'TC': '65.255.48.0/20',
4224 'TD': '154.68.128.0/19',
4225 'TG': '196.168.0.0/14',
4226 'TH': '171.96.0.0/13',
4227 'TJ': '85.9.128.0/18',
4228 'TK': '27.96.24.0/21',
4229 'TL': '180.189.160.0/20',
4230 'TM': '95.85.96.0/19',
4231 'TN': '197.0.0.0/11',
4232 'TO': '175.176.144.0/21',
4233 'TR': '78.160.0.0/11',
4234 'TT': '186.44.0.0/15',
4235 'TV': '202.2.96.0/19',
4236 'TW': '120.96.0.0/11',
4237 'TZ': '156.156.0.0/14',
4238 'UA': '37.52.0.0/14',
4239 'UG': '102.80.0.0/13',
4241 'UY': '167.56.0.0/13',
4242 'UZ': '84.54.64.0/18',
4243 'VA': '212.77.0.0/19',
4244 'VC': '207.191.240.0/21',
4245 'VE': '186.88.0.0/13',
4246 'VG': '66.81.192.0/20',
4247 'VI': '146.226.0.0/16',
4248 'VN': '14.160.0.0/11',
4249 'VU': '202.80.32.0/20',
4250 'WF': '117.20.32.0/21',
4251 'WS': '202.4.32.0/19',
4252 'YE': '134.35.0.0/16',
4253 'YT': '41.242.116.0/22',
4254 'ZA': '41.0.0.0/11',
4255 'ZM': '102.144.0.0/13',
4256 'ZW': '102.177.192.0/18',
4260 def random_ipv4(cls
, code_or_block
):
4261 if len(code_or_block
) == 2:
4262 block
= cls
._country
_ip
_map
.get(code_or_block
.upper())
4266 block
= code_or_block
4267 addr
, preflen
= block
.split('/')
4268 addr_min
= struct
.unpack('!L', socket
.inet_aton(addr
))[0]
4269 addr_max
= addr_min |
(0xffffffff >> int(preflen
))
4270 return str(socket
.inet_ntoa(
4271 struct
.pack('!L', random
.randint(addr_min
, addr_max
))))
4274 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4275 # released into Public Domain
4276 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4278 def long_to_bytes(n
, blocksize
=0):
4279 """long_to_bytes(n:long, blocksize:int) : string
4280 Convert a long integer to a byte string.
4282 If optional blocksize is given and greater than zero, pad the front of the
4283 byte string with binary zeros so that the length is a multiple of
4286 # after much testing, this algorithm was deemed to be the fastest
4290 s
= struct
.pack('>I', n
& 0xffffffff) + s
4292 # strip off leading zeros
4293 for i
in range(len(s
)):
4294 if s
[i
] != b
'\000'[0]:
4297 # only happens when n == 0
4301 # add back some pad bytes. this could be done more efficiently w.r.t. the
4302 # de-padding being done above, but sigh...
4303 if blocksize
> 0 and len(s
) % blocksize
:
4304 s
= (blocksize
- len(s
) % blocksize
) * b
'\000' + s
4308 def bytes_to_long(s
):
4309 """bytes_to_long(string) : long
4310 Convert a byte string to a long integer.
4312 This is (essentially) the inverse of long_to_bytes().
4317 extra
= (4 - length
% 4)
4318 s
= b
'\000' * extra
+ s
4319 length
= length
+ extra
4320 for i
in range(0, length
, 4):
4321 acc
= (acc
<< 32) + struct
.unpack('>I', s
[i
:i
+ 4])[0]
4325 def ohdave_rsa_encrypt(data
, exponent
, modulus
):
4327 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4330 data: data to encrypt, bytes-like object
4331 exponent, modulus: parameter e and N of RSA algorithm, both integer
4332 Output: hex string of encrypted data
4334 Limitation: supports one block encryption only
4337 payload
= int(binascii
.hexlify(data
[::-1]), 16)
4338 encrypted
= pow(payload
, exponent
, modulus
)
4339 return '%x' % encrypted
4342 def pkcs1pad(data
, length
):
4344 Padding input data with PKCS#1 scheme
4346 @param {int[]} data input data
4347 @param {int} length target length
4348 @returns {int[]} padded data
4350 if len(data
) > length
- 11:
4351 raise ValueError('Input data too long for PKCS#1 padding')
4353 pseudo_random
= [random
.randint(0, 254) for _
in range(length
- len(data
) - 3)]
4354 return [0, 2] + pseudo_random
+ [0] + data
4357 def _base_n_table(n
, table
):
4358 if not table
and not n
:
4359 raise ValueError('Either table or n must be specified')
4360 table
= (table
or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n
]
4362 if n
and n
!= len(table
):
4363 raise ValueError(f
'base {n} exceeds table length {len(table)}')
4367 def encode_base_n(num
, n
=None, table
=None):
4368 """Convert given int to a base-n string"""
4369 table
= _base_n_table(n
, table
)
4373 result
, base
= '', len(table
)
4375 result
= table
[num
% base
] + result
4380 def decode_base_n(string
, n
=None, table
=None):
4381 """Convert given base-n string to int"""
4382 table
= {char: index for index, char in enumerate(_base_n_table(n, table))}
4383 result
, base
= 0, len(table
)
4385 result
= result
* base
+ table
[char
]
4389 def decode_packed_codes(code
):
4390 mobj
= re
.search(PACKED_CODES_RE
, code
)
4391 obfuscated_code
, base
, count
, symbols
= mobj
.groups()
4394 symbols
= symbols
.split('|')
4399 base_n_count
= encode_base_n(count
, base
)
4400 symbol_table
[base_n_count
] = symbols
[count
] or base_n_count
4403 r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)],
4407 def caesar(s
, alphabet
, shift
):
4412 alphabet
[(alphabet
.index(c
) + shift
) % l
] if c
in alphabet
else c
4417 return caesar(s
, r
'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4420 def parse_m3u8_attributes(attrib
):
4422 for (key
, val
) in re
.findall(r
'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib
):
4423 if val
.startswith('"'):
4429 def urshift(val
, n
):
4430 return val
>> n
if val
>= 0 else (val
+ 0x100000000) >> n
4433 def write_xattr(path
, key
, value
):
4434 # Windows: Write xattrs to NTFS Alternate Data Streams:
4435 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4436 if compat_os_name
== 'nt':
4437 assert ':' not in key
4438 assert os
.path
.exists(path
)
4441 with open(f
'{path}:{key}', 'wb') as f
:
4443 except OSError as e
:
4444 raise XAttrMetadataError(e
.errno
, e
.strerror
)
4447 # UNIX Method 1. Use os.setxattr/xattrs/pyxattrs modules
4450 if callable(getattr(os
, 'setxattr', None)):
4451 setxattr
= os
.setxattr
4452 elif getattr(xattr
, '_yt_dlp__identifier', None) == 'pyxattr':
4453 # Unicode arguments are not supported in pyxattr until version 0.5.0
4454 # See https://github.com/ytdl-org/youtube-dl/issues/5498
4455 if version_tuple(xattr
.__version
__) >= (0, 5, 0):
4456 setxattr
= xattr
.set
4458 setxattr
= xattr
.setxattr
4462 setxattr(path
, key
, value
)
4463 except OSError as e
:
4464 raise XAttrMetadataError(e
.errno
, e
.strerror
)
4467 # UNIX Method 2. Use setfattr/xattr executables
4468 exe
= ('setfattr' if check_executable('setfattr', ['--version'])
4469 else 'xattr' if check_executable('xattr', ['-h']) else None)
4471 raise XAttrUnavailableError(
4472 'Couldn\'t find a tool to set the xattrs. Install either the "xattr" or "pyxattr" Python modules or the '
4473 + ('"xattr" binary' if sys
.platform
!= 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
4475 value
= value
.decode()
4477 _
, stderr
, returncode
= Popen
.run(
4478 [exe
, '-w', key
, value
, path
] if exe
== 'xattr' else [exe
, '-n', key
, '-v', value
, path
],
4479 text
=True, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
, stdin
=subprocess
.PIPE
)
4480 except OSError as e
:
4481 raise XAttrMetadataError(e
.errno
, e
.strerror
)
4483 raise XAttrMetadataError(returncode
, stderr
)
4486 def random_birthday(year_field
, month_field
, day_field
):
4487 start_date
= datetime
.date(1950, 1, 1)
4488 end_date
= datetime
.date(1995, 12, 31)
4489 offset
= random
.randint(0, (end_date
- start_date
).days
)
4490 random_date
= start_date
+ datetime
.timedelta(offset
)
4492 year_field
: str(random_date
.year
),
4493 month_field
: str(random_date
.month
),
4494 day_field
: str(random_date
.day
),
4498 def find_available_port(interface
=''):
4500 with socket
.socket() as sock
:
4501 sock
.bind((interface
, 0))
4502 return sock
.getsockname()[1]
4507 # Templates for internet shortcut files, which are plain text files.
4508 DOT_URL_LINK_TEMPLATE
= '''\
4513 DOT_WEBLOC_LINK_TEMPLATE
= '''\
4514 <?xml version="1.0" encoding="UTF-8"?>
4515 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4516 <plist version="1.0">
4519 \t<string>%(url)s</string>
4524 DOT_DESKTOP_LINK_TEMPLATE
= '''\
4534 'url': DOT_URL_LINK_TEMPLATE
,
4535 'desktop': DOT_DESKTOP_LINK_TEMPLATE
,
4536 'webloc': DOT_WEBLOC_LINK_TEMPLATE
,
4540 def iri_to_uri(iri
):
4542 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
4544 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
4547 iri_parts
= urllib
.parse
.urlparse(iri
)
4549 if '[' in iri_parts
.netloc
:
4550 raise ValueError('IPv6 URIs are not, yet, supported.')
4551 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
4553 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
4556 if iri_parts
.username
:
4557 net_location
+= urllib
.parse
.quote(iri_parts
.username
, safe
=r
"!$%&'()*+,~")
4558 if iri_parts
.password
is not None:
4559 net_location
+= ':' + urllib
.parse
.quote(iri_parts
.password
, safe
=r
"!$%&'()*+,~")
4562 net_location
+= iri_parts
.hostname
.encode('idna').decode() # Punycode for Unicode hostnames.
4563 # The 'idna' encoding produces ASCII text.
4564 if iri_parts
.port
is not None and iri_parts
.port
!= 80:
4565 net_location
+= ':' + str(iri_parts
.port
)
4567 return urllib
.parse
.urlunparse(
4571 urllib
.parse
.quote_plus(iri_parts
.path
, safe
=r
"!$%&'()*+,/:;=@|~"),
4573 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
4574 urllib
.parse
.quote_plus(iri_parts
.params
, safe
=r
"!$%&'()*+,/:;=@|~"),
4576 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
4577 urllib
.parse
.quote_plus(iri_parts
.query
, safe
=r
"!$%&'()*+,/:;=?@{|}~"),
4579 urllib
.parse
.quote_plus(iri_parts
.fragment
, safe
=r
"!#$%&'()*+,/:;=?@{|}~")))
4581 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
4584 def to_high_limit_path(path
):
4585 if sys
.platform
in ['win32', 'cygwin']:
4586 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
4587 return '\\\\?\\' + os
.path
.abspath(path
)
4592 def format_field(obj
, field
=None, template
='%s', ignore
=NO_DEFAULT
, default
='', func
=IDENTITY
):
4593 val
= traversal
.traverse_obj(obj
, *variadic(field
))
4594 if not val
if ignore
is NO_DEFAULT
else val
in variadic(ignore
):
4596 return template
% func(val
)
4599 def clean_podcast_url(url
):
4600 url
= re
.sub(r
'''(?x)
4604 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
4609 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
4612 cn\.co| # https://podcorn.com/analytics-prefix/
4613 st\.fm # https://podsights.com/docs/
4618 return re
.sub(r
'^\w+://(\w+://)', r
'\1', url
)
4621 _HEX_TABLE
= '0123456789abcdef'
4624 def random_uuidv4():
4625 return re
.sub(r
'[xy]', lambda x
: _HEX_TABLE
[random
.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
4628 def make_dir(path
, to_screen
=None):
4630 dn
= os
.path
.dirname(path
)
4632 os
.makedirs(dn
, exist_ok
=True)
4634 except OSError as err
:
4635 if callable(to_screen
) is not None:
4636 to_screen(f
'unable to create directory {err}')
4640 def get_executable_path():
4641 from ..update
import _get_variant_and_executable_path
4643 return os
.path
.dirname(os
.path
.abspath(_get_variant_and_executable_path()[1]))
4646 def get_user_config_dirs(package_name
):
4647 # .config (e.g. ~/.config/package_name)
4648 xdg_config_home
= os
.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
4649 yield os
.path
.join(xdg_config_home
, package_name
)
4651 # appdata (%APPDATA%/package_name)
4652 appdata_dir
= os
.getenv('appdata')
4654 yield os
.path
.join(appdata_dir
, package_name
)
4656 # home (~/.package_name)
4657 yield os
.path
.join(compat_expanduser('~'), f
'.{package_name}')
4660 def get_system_config_dirs(package_name
):
4662 yield os
.path
.join('/etc', package_name
)
4665 def time_seconds(**kwargs
):
4667 Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
4669 return time
.time() + datetime
.timedelta(**kwargs
).total_seconds()
4672 # create a JSON Web Signature (jws) with HS256 algorithm
4673 # the resulting format is in JWS Compact Serialization
4674 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
4675 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
4676 def jwt_encode_hs256(payload_data
, key
, headers
={}):
4682 header_data
.update(headers
)
4683 header_b64
= base64
.b64encode(json
.dumps(header_data
).encode())
4684 payload_b64
= base64
.b64encode(json
.dumps(payload_data
).encode())
4685 h
= hmac
.new(key
.encode(), header_b64
+ b
'.' + payload_b64
, hashlib
.sha256
)
4686 signature_b64
= base64
.b64encode(h
.digest())
4687 token
= header_b64
+ b
'.' + payload_b64
+ b
'.' + signature_b64
4691 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
4692 def jwt_decode_hs256(jwt
):
4693 header_b64
, payload_b64
, signature_b64
= jwt
.split('.')
4694 # add trailing ='s that may have been stripped, superfluous ='s are ignored
4695 payload_data
= json
.loads(base64
.urlsafe_b64decode(f
'{payload_b64}==='))
4699 WINDOWS_VT_MODE
= False if compat_os_name
== 'nt' else None
4703 def supports_terminal_sequences(stream
):
4704 if compat_os_name
== 'nt':
4705 if not WINDOWS_VT_MODE
:
4707 elif not os
.getenv('TERM'):
4710 return stream
.isatty()
4711 except BaseException
:
4715 def windows_enable_vt_mode():
4716 """Ref: https://bugs.python.org/issue30075 """
4717 if get_windows_version() < (10, 0, 10586):
4721 import ctypes
.wintypes
4724 ENABLE_VIRTUAL_TERMINAL_PROCESSING
= 0x0004
4726 dll
= ctypes
.WinDLL('kernel32', use_last_error
=False)
4727 handle
= os
.open('CONOUT$', os
.O_RDWR
)
4729 h_out
= ctypes
.wintypes
.HANDLE(msvcrt
.get_osfhandle(handle
))
4730 dw_original_mode
= ctypes
.wintypes
.DWORD()
4731 success
= dll
.GetConsoleMode(h_out
, ctypes
.byref(dw_original_mode
))
4733 raise Exception('GetConsoleMode failed')
4735 success
= dll
.SetConsoleMode(h_out
, ctypes
.wintypes
.DWORD(
4736 dw_original_mode
.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING
))
4738 raise Exception('SetConsoleMode failed')
4742 global WINDOWS_VT_MODE
4743 WINDOWS_VT_MODE
= True
4744 supports_terminal_sequences
.cache_clear()
4747 _terminal_sequences_re
= re
.compile('\033\\[[^m]+m')
4750 def remove_terminal_sequences(string
):
4751 return _terminal_sequences_re
.sub('', string
)
4754 def number_of_digits(number
):
4755 return len('%d' % number
)
4758 def join_nonempty(*values
, delim
='-', from_dict
=None):
4759 if from_dict
is not None:
4760 values
= (traversal
.traverse_obj(from_dict
, variadic(v
)) for v
in values
)
4761 return delim
.join(map(str, filter(None, values
)))
4764 def scale_thumbnails_to_max_format_width(formats
, thumbnails
, url_width_re
):
4766 Find the largest format dimensions in terms of video width and, for each thumbnail:
4767 * Modify the URL: Match the width with the provided regex and replace with the former width
4770 This function is useful with video services that scale the provided thumbnails on demand
4772 _keys
= ('width', 'height')
4773 max_dimensions
= max(
4774 (tuple(format
.get(k
) or 0 for k
in _keys
) for format
in formats
),
4776 if not max_dimensions
[0]:
4780 {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])}
,
4781 dict(zip(_keys
, max_dimensions
)), thumbnail
)
4782 for thumbnail
in thumbnails
4786 def parse_http_range(range):
4787 """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
4789 return None, None, None
4790 crg
= re
.search(r
'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
4792 return None, None, None
4793 return int(crg
.group(1)), int_or_none(crg
.group(2)), int_or_none(crg
.group(3))
4796 def read_stdin(what
):
4798 eof
= 'Ctrl+Z' if compat_os_name
== 'nt' else 'Ctrl+D'
4799 write_string(f
'Reading {what} from STDIN - EOF ({eof}) to end:\n')
4803 def determine_file_encoding(data
):
4805 Detect the text encoding used
4806 @returns (encoding, bytes to skip)
4809 # BOM marks are given priority over declarations
4810 for bom
, enc
in BOMS
:
4811 if data
.startswith(bom
):
4812 return enc
, len(bom
)
4814 # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
4815 # We ignore the endianness to get a good enough match
4816 data
= data
.replace(b
'\0', b
'')
4817 mobj
= re
.match(rb
'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data
)
4818 return mobj
.group(1).decode() if mobj
else None, 0
4825 __initialized
= False
4827 def __init__(self
, parser
, label
=None):
4828 self
.parser
, self
.label
= parser
, label
4829 self
._loaded
_paths
, self
.configs
= set(), []
4831 def init(self
, args
=None, filename
=None):
4832 assert not self
.__initialized
4833 self
.own_args
, self
.filename
= args
, filename
4834 return self
.load_configs()
4836 def load_configs(self
):
4839 location
= os
.path
.realpath(self
.filename
)
4840 directory
= os
.path
.dirname(location
)
4841 if location
in self
._loaded
_paths
:
4843 self
._loaded
_paths
.add(location
)
4845 self
.__initialized
= True
4846 opts
, _
= self
.parser
.parse_known_args(self
.own_args
)
4847 self
.parsed_args
= self
.own_args
4848 for location
in opts
.config_locations
or []:
4850 if location
in self
._loaded
_paths
:
4852 self
._loaded
_paths
.add(location
)
4853 self
.append_config(shlex
.split(read_stdin('options'), comments
=True), label
='stdin')
4855 location
= os
.path
.join(directory
, expand_path(location
))
4856 if os
.path
.isdir(location
):
4857 location
= os
.path
.join(location
, 'yt-dlp.conf')
4858 if not os
.path
.exists(location
):
4859 self
.parser
.error(f
'config location {location} does not exist')
4860 self
.append_config(self
.read_file(location
), location
)
4864 label
= join_nonempty(
4865 self
.label
, 'config', f
'"{self.filename}"' if self
.filename
else '',
4867 return join_nonempty(
4868 self
.own_args
is not None and f
'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
4869 *(f
'\n{c}'.replace('\n', '\n| ')[1:] for c
in self
.configs
),
4873 def read_file(filename
, default
=[]):
4875 optionf
= open(filename
, 'rb')
4877 return default
# silently skip if file is not present
4879 enc
, skip
= determine_file_encoding(optionf
.read(512))
4880 optionf
.seek(skip
, io
.SEEK_SET
)
4882 enc
= None # silently skip read errors
4884 # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
4885 contents
= optionf
.read().decode(enc
or preferredencoding())
4886 res
= shlex
.split(contents
, comments
=True)
4887 except Exception as err
:
4888 raise ValueError(f
'Unable to parse "{filename}": {err}')
4894 def hide_login_info(opts
):
4895 PRIVATE_OPTS
= {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
4896 eqre
= re
.compile('^(?P<key>' + ('|'.join(re
.escape(po
) for po
in PRIVATE_OPTS
)) + ')=.+$')
4901 return m
.group('key') + '=PRIVATE'
4905 opts
= list(map(_scrub_eq
, opts
))
4906 for idx
, opt
in enumerate(opts
):
4907 if opt
in PRIVATE_OPTS
and idx
+ 1 < len(opts
):
4908 opts
[idx
+ 1] = 'PRIVATE'
4911 def append_config(self
, *args
, label
=None):
4912 config
= type(self
)(self
.parser
, label
)
4913 config
._loaded
_paths
= self
._loaded
_paths
4914 if config
.init(*args
):
4915 self
.configs
.append(config
)
4919 for config
in reversed(self
.configs
):
4920 yield from config
.all_args
4921 yield from self
.parsed_args
or []
4923 def parse_known_args(self
, **kwargs
):
4924 return self
.parser
.parse_known_args(self
.all_args
, **kwargs
)
4926 def parse_args(self
):
4927 return self
.parser
.parse_args(self
.all_args
)
4930 def merge_headers(*dicts
):
4931 """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
4932 return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
4935 def cached_method(f
):
4936 """Cache a method"""
4937 signature
= inspect
.signature(f
)
4940 def wrapper(self
, *args
, **kwargs
):
4941 bound_args
= signature
.bind(self
, *args
, **kwargs
)
4942 bound_args
.apply_defaults()
4943 key
= tuple(bound_args
.arguments
.values())[1:]
4945 cache
= vars(self
).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {}
)
4946 if key
not in cache
:
4947 cache
[key
] = f(self
, *args
, **kwargs
)
4952 class classproperty
:
4953 """property access for class methods with optional caching"""
4954 def __new__(cls
, func
=None, *args
, **kwargs
):
4956 return functools
.partial(cls
, *args
, **kwargs
)
4957 return super().__new
__(cls
)
4959 def __init__(self
, func
, *, cache
=False):
4960 functools
.update_wrapper(self
, func
)
4962 self
._cache
= {} if cache
else None
4964 def __get__(self
, _
, cls
):
4965 if self
._cache
is None:
4966 return self
.func(cls
)
4967 elif cls
not in self
._cache
:
4968 self
._cache
[cls
] = self
.func(cls
)
4969 return self
._cache
[cls
]
4972 class function_with_repr
:
4973 def __init__(self
, func
, repr_
=None):
4974 functools
.update_wrapper(self
, func
)
4975 self
.func
, self
.__repr
= func
, repr_
4977 def __call__(self
, *args
, **kwargs
):
4978 return self
.func(*args
, **kwargs
)
4983 return f
'{self.func.__module__}.{self.func.__qualname__}'
4986 class Namespace(types
.SimpleNamespace
):
4987 """Immutable namespace"""
4990 return iter(self
.__dict
__.values())
4994 return self
.__dict
__.items()
4997 MEDIA_EXTENSIONS
= Namespace(
4998 common_video
=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
4999 video
=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
5000 common_audio
=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5001 audio
=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
5002 thumbnails
=('jpg', 'png', 'webp'),
5003 storyboards
=('mhtml', ),
5004 subtitles
=('srt', 'vtt', 'ass', 'lrc'),
5005 manifests
=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5007 MEDIA_EXTENSIONS
.video
+= MEDIA_EXTENSIONS
.common_video
5008 MEDIA_EXTENSIONS
.audio
+= MEDIA_EXTENSIONS
.common_audio
5010 KNOWN_EXTENSIONS
= (*MEDIA_EXTENSIONS
.video
, *MEDIA_EXTENSIONS
.audio
, *MEDIA_EXTENSIONS
.manifests
)
5015 for retry in RetryManager(...):
5018 except SomeException as err:
5022 attempt
, _error
= 0, None
5024 def __init__(self
, _retries
, _error_callback
, **kwargs
):
5025 self
.retries
= _retries
or 0
5026 self
.error_callback
= functools
.partial(_error_callback
, **kwargs
)
5028 def _should_retry(self
):
5029 return self
._error
is not NO_DEFAULT
and self
.attempt
<= self
.retries
5033 if self
._error
is NO_DEFAULT
:
5038 def error(self
, value
):
5042 while self
._should
_retry
():
5043 self
.error
= NO_DEFAULT
5047 self
.error_callback(self
.error
, self
.attempt
, self
.retries
)
5050 def report_retry(e
, count
, retries
, *, sleep_func
, info
, warn
, error
=None, suffix
=None):
5051 """Utility function for reporting retries"""
5054 return error(f
'{e}. Giving up after {count - 1} retries') if count
> 1 else error(str(e
))
5059 elif isinstance(e
, ExtractorError
):
5060 e
= remove_end(str_or_none(e
.cause
) or e
.orig_msg
, '.')
5061 warn(f
'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5063 delay
= float_or_none(sleep_func(n
=count
- 1)) if callable(sleep_func
) else sleep_func
5065 info(f
'Sleeping {delay:.2f} seconds ...')
5069 def make_archive_id(ie
, video_id
):
5070 ie_key
= ie
if isinstance(ie
, str) else ie
.ie_key()
5071 return f
'{ie_key.lower()} {video_id}'
5074 def truncate_string(s
, left
, right
=0):
5075 assert left
> 3 and right
>= 0
5076 if s
is None or len(s
) <= left
+ right
:
5078 return f
'{s[:left - 3]}...{s[-right:] if right else ""}'
5081 def orderedSet_from_options(options
, alias_dict
, *, use_regex
=False, start
=None):
5082 assert 'all' in alias_dict
, '"all" alias is required'
5083 requested
= list(start
or [])
5085 discard
= val
.startswith('-')
5089 if val
in alias_dict
:
5090 val
= alias_dict
[val
] if not discard
else [
5091 i
[1:] if i
.startswith('-') else f
'-{i}' for i
in alias_dict
[val
]]
5092 # NB: Do not allow regex in aliases for performance
5093 requested
= orderedSet_from_options(val
, alias_dict
, start
=requested
)
5096 current
= (filter(re
.compile(val
, re
.I
).fullmatch
, alias_dict
['all']) if use_regex
5097 else [val
] if val
in alias_dict
['all'] else None)
5099 raise ValueError(val
)
5102 for item
in current
:
5103 while item
in requested
:
5104 requested
.remove(item
)
5106 requested
.extend(current
)
5108 return orderedSet(requested
)
5113 regex
= r
' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
5115 default
= ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
5116 'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
5117 'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
5118 ytdl_default
= ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
5119 'height', 'width', 'proto', 'vext', 'abr', 'aext',
5120 'fps', 'fs_approx', 'source', 'id')
5123 'vcodec': {'type': 'ordered', 'regex': True,
5124 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
5125 'acodec': {'type': 'ordered', 'regex': True,
5126 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
5127 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
5128 'order': ['dv', '(hdr)?12', r
'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
5129 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
5130 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
5131 'vext': {'type': 'ordered', 'field': 'video_ext',
5132 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
5133 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
5134 'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
5135 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
5136 'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
5137 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}
,
5138 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
5139 'field': ('vcodec', 'acodec'),
5140 'function': lambda it
: int(any(v
!= 'none' for v
in it
))},
5141 'ie_pref': {'priority': True, 'type': 'extractor'}
,
5142 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)}
,
5143 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}
,
5144 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1}
,
5145 'quality': {'convert': 'float', 'default': -1}
,
5146 'filesize': {'convert': 'bytes'}
,
5147 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'}
,
5148 'id': {'convert': 'string', 'field': 'format_id'}
,
5149 'height': {'convert': 'float_none'}
,
5150 'width': {'convert': 'float_none'}
,
5151 'fps': {'convert': 'float_none'}
,
5152 'channels': {'convert': 'float_none', 'field': 'audio_channels'}
,
5153 'tbr': {'convert': 'float_none'}
,
5154 'vbr': {'convert': 'float_none'}
,
5155 'abr': {'convert': 'float_none'}
,
5156 'asr': {'convert': 'float_none'}
,
5157 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1}
,
5159 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')}
,
5160 'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
5161 'function': lambda it
: next(filter(None, it
), None)},
5162 'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
5163 'function': lambda it
: next(filter(None, it
), None)},
5164 'ext': {'type': 'combined', 'field': ('vext', 'aext')}
,
5165 'res': {'type': 'multiple', 'field': ('height', 'width'),
5166 'function': lambda it
: (lambda l
: min(l
) if l
else 0)(tuple(filter(None, it
)))},
5168 # Actual field names
5169 'format_id': {'type': 'alias', 'field': 'id'}
,
5170 'preference': {'type': 'alias', 'field': 'ie_pref'}
,
5171 'language_preference': {'type': 'alias', 'field': 'lang'}
,
5172 'source_preference': {'type': 'alias', 'field': 'source'}
,
5173 'protocol': {'type': 'alias', 'field': 'proto'}
,
5174 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}
,
5175 'audio_channels': {'type': 'alias', 'field': 'channels'}
,
5178 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True}
,
5179 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True}
,
5180 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True}
,
5181 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True}
,
5182 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True}
,
5183 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True}
,
5184 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True}
,
5185 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True}
,
5186 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True}
,
5187 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True}
,
5188 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True}
,
5189 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True}
,
5190 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True}
,
5191 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True}
,
5192 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}
,
5193 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}
,
5194 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}
,
5195 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}
,
5196 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}
,
5197 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}
,
5200 def __init__(self
, ydl
, field_preference
):
5203 self
.evaluate_params(self
.ydl
.params
, field_preference
)
5204 if ydl
.params
.get('verbose'):
5205 self
.print_verbose_info(self
.ydl
.write_debug
)
5207 def _get_field_setting(self
, field
, key
):
5208 if field
not in self
.settings
:
5209 if key
in ('forced', 'priority'):
5211 self
.ydl
.deprecated_feature(f
'Using arbitrary fields ({field}) for format sorting is '
5212 'deprecated and may be removed in a future version')
5213 self
.settings
[field
] = {}
5214 propObj
= self
.settings
[field
]
5215 if key
not in propObj
:
5216 type = propObj
.get('type')
5218 default
= 'preference' if type == 'extractor' else (field
,) if type in ('combined', 'multiple') else field
5219 elif key
== 'convert':
5220 default
= 'order' if type == 'ordered' else 'float_string' if field
else 'ignore'
5222 default
= {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}
.get(key
, None)
5223 propObj
[key
] = default
5226 def _resolve_field_value(self
, field
, value
, convertNone
=False):
5231 value
= value
.lower()
5232 conversion
= self
._get
_field
_setting
(field
, 'convert')
5233 if conversion
== 'ignore':
5235 if conversion
== 'string':
5237 elif conversion
== 'float_none':
5238 return float_or_none(value
)
5239 elif conversion
== 'bytes':
5240 return parse_bytes(value
)
5241 elif conversion
== 'order':
5242 order_list
= (self
._use
_free
_order
and self
._get
_field
_setting
(field
, 'order_free')) or self
._get
_field
_setting
(field
, 'order')
5243 use_regex
= self
._get
_field
_setting
(field
, 'regex')
5244 list_length
= len(order_list
)
5245 empty_pos
= order_list
.index('') if '' in order_list
else list_length
+ 1
5246 if use_regex
and value
is not None:
5247 for i
, regex
in enumerate(order_list
):
5248 if regex
and re
.match(regex
, value
):
5249 return list_length
- i
5250 return list_length
- empty_pos
# not in list
5251 else: # not regex or value = None
5252 return list_length
- (order_list
.index(value
) if value
in order_list
else empty_pos
)
5254 if value
.isnumeric():
5257 self
.settings
[field
]['convert'] = 'string'
5260 def evaluate_params(self
, params
, sort_extractor
):
5261 self
._use
_free
_order
= params
.get('prefer_free_formats', False)
5262 self
._sort
_user
= params
.get('format_sort', [])
5263 self
._sort
_extractor
= sort_extractor
5265 def add_item(field
, reverse
, closest
, limit_text
):
5266 field
= field
.lower()
5267 if field
in self
._order
:
5269 self
._order
.append(field
)
5270 limit
= self
._resolve
_field
_value
(field
, limit_text
)
5273 'closest': False if limit
is None else closest
,
5274 'limit_text': limit_text
,
5276 if field
in self
.settings
:
5277 self
.settings
[field
].update(data
)
5279 self
.settings
[field
] = data
5282 tuple(field
for field
in self
.default
if self
._get
_field
_setting
(field
, 'forced'))
5283 + (tuple() if params
.get('format_sort_force', False)
5284 else tuple(field
for field
in self
.default
if self
._get
_field
_setting
(field
, 'priority')))
5285 + tuple(self
._sort
_user
) + tuple(sort_extractor
) + self
.default
)
5287 for item
in sort_list
:
5288 match
= re
.match(self
.regex
, item
)
5290 raise ExtractorError('Invalid format sort string "%s" given by extractor' % item
)
5291 field
= match
.group('field')
5294 if self
._get
_field
_setting
(field
, 'type') == 'alias':
5295 alias
, field
= field
, self
._get
_field
_setting
(field
, 'field')
5296 if self
._get
_field
_setting
(alias
, 'deprecated'):
5297 self
.ydl
.deprecated_feature(f
'Format sorting alias {alias} is deprecated and may '
5298 f
'be removed in a future version. Please use {field} instead')
5299 reverse
= match
.group('reverse') is not None
5300 closest
= match
.group('separator') == '~'
5301 limit_text
= match
.group('limit')
5303 has_limit
= limit_text
is not None
5304 has_multiple_fields
= self
._get
_field
_setting
(field
, 'type') == 'combined'
5305 has_multiple_limits
= has_limit
and has_multiple_fields
and not self
._get
_field
_setting
(field
, 'same_limit')
5307 fields
= self
._get
_field
_setting
(field
, 'field') if has_multiple_fields
else (field
,)
5308 limits
= limit_text
.split(':') if has_multiple_limits
else (limit_text
,) if has_limit
else tuple()
5309 limit_count
= len(limits
)
5310 for (i
, f
) in enumerate(fields
):
5311 add_item(f
, reverse
, closest
,
5312 limits
[i
] if i
< limit_count
5313 else limits
[0] if has_limit
and not has_multiple_limits
5316 def print_verbose_info(self
, write_debug
):
5318 write_debug('Sort order given by user: %s' % ', '.join(self
._sort
_user
))
5319 if self
._sort
_extractor
:
5320 write_debug('Sort order given by extractor: %s' % ', '.join(self
._sort
_extractor
))
5321 write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
5322 '+' if self
._get
_field
_setting
(field
, 'reverse') else '', field
,
5323 '%s%s(%s)' % ('~' if self
._get
_field
_setting
(field
, 'closest') else ':',
5324 self
._get
_field
_setting
(field
, 'limit_text'),
5325 self
._get
_field
_setting
(field
, 'limit'))
5326 if self
._get
_field
_setting
(field
, 'limit_text') is not None else '')
5327 for field
in self
._order
if self
._get
_field
_setting
(field
, 'visible')]))
5329 def _calculate_field_preference_from_value(self
, format
, field
, type, value
):
5330 reverse
= self
._get
_field
_setting
(field
, 'reverse')
5331 closest
= self
._get
_field
_setting
(field
, 'closest')
5332 limit
= self
._get
_field
_setting
(field
, 'limit')
5334 if type == 'extractor':
5335 maximum
= self
._get
_field
_setting
(field
, 'max')
5336 if value
is None or (maximum
is not None and value
>= maximum
):
5338 elif type == 'boolean':
5339 in_list
= self
._get
_field
_setting
(field
, 'in_list')
5340 not_in_list
= self
._get
_field
_setting
(field
, 'not_in_list')
5341 value
= 0 if ((in_list
is None or value
in in_list
) and (not_in_list
is None or value
not in not_in_list
)) else -1
5342 elif type == 'ordered':
5343 value
= self
._resolve
_field
_value
(field
, value
, True)
5345 # try to convert to number
5346 val_num
= float_or_none(value
, default
=self
._get
_field
_setting
(field
, 'default'))
5347 is_num
= self
._get
_field
_setting
(field
, 'convert') != 'string' and val_num
is not None
5351 return ((-10, 0) if value
is None
5352 else (1, value
, 0) if not is_num
# if a field has mixed strings and numbers, strings are sorted higher
5353 else (0, -abs(value
- limit
), value
- limit
if reverse
else limit
- value
) if closest
5354 else (0, value
, 0) if not reverse
and (limit
is None or value
<= limit
)
5355 else (0, -value
, 0) if limit
is None or (reverse
and value
== limit
) or value
> limit
5356 else (-1, value
, 0))
5358 def _calculate_field_preference(self
, format
, field
):
5359 type = self
._get
_field
_setting
(field
, 'type') # extractor, boolean, ordered, field, multiple
5360 get_value
= lambda f
: format
.get(self
._get
_field
_setting
(f
, 'field'))
5361 if type == 'multiple':
5362 type = 'field' # Only 'field' is allowed in multiple for now
5363 actual_fields
= self
._get
_field
_setting
(field
, 'field')
5365 value
= self
._get
_field
_setting
(field
, 'function')(get_value(f
) for f
in actual_fields
)
5367 value
= get_value(field
)
5368 return self
._calculate
_field
_preference
_from
_value
(format
, field
, type, value
)
5370 def calculate_preference(self
, format
):
5371 # Determine missing protocol
5372 if not format
.get('protocol'):
5373 format
['protocol'] = determine_protocol(format
)
5375 # Determine missing ext
5376 if not format
.get('ext') and 'url' in format
:
5377 format
['ext'] = determine_ext(format
['url'])
5378 if format
.get('vcodec') == 'none':
5379 format
['audio_ext'] = format
['ext'] if format
.get('acodec') != 'none' else 'none'
5380 format
['video_ext'] = 'none'
5382 format
['video_ext'] = format
['ext']
5383 format
['audio_ext'] = 'none'
5384 # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
5385 # format['preference'] = -1000
5387 if format
.get('preference') is None and format
.get('ext') == 'flv' and re
.match('[hx]265|he?vc?', format
.get('vcodec') or ''):
5388 # HEVC-over-FLV is out-of-spec by FLV's original spec
5389 # ref. https://trac.ffmpeg.org/ticket/6389
5390 # ref. https://github.com/yt-dlp/yt-dlp/pull/5821
5391 format
['preference'] = -100
5393 # Determine missing bitrates
5394 if format
.get('vcodec') == 'none':
5396 if format
.get('acodec') == 'none':
5398 if not format
.get('vbr') and format
.get('vcodec') != 'none':
5399 format
['vbr'] = try_call(lambda: format
['tbr'] - format
['abr']) or None
5400 if not format
.get('abr') and format
.get('acodec') != 'none':
5401 format
['abr'] = try_call(lambda: format
['tbr'] - format
['vbr']) or None
5402 if not format
.get('tbr'):
5403 format
['tbr'] = try_call(lambda: format
['vbr'] + format
['abr']) or None
5405 return tuple(self
._calculate
_field
_preference
(format
, field
) for field
in self
._order
)
5410 def __init__(self
, ydl
=None):
5413 def debug(self
, message
):
5415 self
._ydl
.write_debug(message
)
5417 def info(self
, message
):
5419 self
._ydl
.to_screen(message
)
5421 def warning(self
, message
, *, once
=False):
5423 self
._ydl
.report_warning(message
, once
)
5425 def error(self
, message
, *, is_error
=True):
5427 self
._ydl
.report_error(message
, is_error
=is_error
)
5429 def stdout(self
, message
):
5431 self
._ydl
.to_stdout(message
)
5433 def stderr(self
, message
):
5435 self
._ydl
.to_stderr(message
)