yt_dlp/utils/_utils.py

   1 import asyncio
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import collections.abc
   9 import contextlib
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import html.entities
  18 import html.parser
  19 import http.client
  20 import http.cookiejar
  21 import inspect
  22 import io
  23 import itertools
  24 import json
  25 import locale
  26 import math
  27 import mimetypes
  28 import operator
  29 import os
  30 import platform
  31 import random
  32 import re
  33 import shlex
  34 import socket
  35 import ssl
  36 import struct
  37 import subprocess
  38 import sys
  39 import tempfile
  40 import time
  41 import traceback
  42 import types
  43 import unicodedata
  44 import urllib.error
  45 import urllib.parse
  46 import urllib.request
  47 import xml.etree.ElementTree
  48 import zlib
  49
  50 from . import traversal
  51
  52 from ..compat import functools  # isort: split
  53 from ..compat import (
  54     compat_etree_fromstring,
  55     compat_expanduser,
  56     compat_HTMLParseError,
  57     compat_os_name,
  58     compat_shlex_quote,
  59 )
  60 from ..dependencies import brotli, certifi, websockets, xattr
  61 from ..socks import ProxyType, sockssocket
  62
  63 # This is not clearly defined otherwise
  64 compiled_regex_type = type(re.compile(''))
  65
  66
  67 def random_user_agent():
  68     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  69     _CHROME_VERSIONS = (
  70         '90.0.4430.212',
  71         '90.0.4430.24',
  72         '90.0.4430.70',
  73         '90.0.4430.72',
  74         '90.0.4430.85',
  75         '90.0.4430.93',
  76         '91.0.4472.101',
  77         '91.0.4472.106',
  78         '91.0.4472.114',
  79         '91.0.4472.124',
  80         '91.0.4472.164',
  81         '91.0.4472.19',
  82         '91.0.4472.77',
  83         '92.0.4515.107',
  84         '92.0.4515.115',
  85         '92.0.4515.131',
  86         '92.0.4515.159',
  87         '92.0.4515.43',
  88         '93.0.4556.0',
  89         '93.0.4577.15',
  90         '93.0.4577.63',
  91         '93.0.4577.82',
  92         '94.0.4606.41',
  93         '94.0.4606.54',
  94         '94.0.4606.61',
  95         '94.0.4606.71',
  96         '94.0.4606.81',
  97         '94.0.4606.85',
  98         '95.0.4638.17',
  99         '95.0.4638.50',
 100         '95.0.4638.54',
 101         '95.0.4638.69',
 102         '95.0.4638.74',
 103         '96.0.4664.18',
 104         '96.0.4664.45',
 105         '96.0.4664.55',
 106         '96.0.4664.93',
 107         '97.0.4692.20',
 108     )
 109     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 110
 111
 112 SUPPORTED_ENCODINGS = [
 113     'gzip', 'deflate'
 114 ]
 115 if brotli:
 116     SUPPORTED_ENCODINGS.append('br')
 117
 118 std_headers = {
 119     'User-Agent': random_user_agent(),
 120     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 121     'Accept-Language': 'en-us,en;q=0.5',
 122     'Sec-Fetch-Mode': 'navigate',
 123 }
 124
 125
 126 USER_AGENTS = {
 127     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 128 }
 129
 130
 131 NO_DEFAULT = object()
 132 IDENTITY = lambda x: x
 133
 134 ENGLISH_MONTH_NAMES = [
 135     'January', 'February', 'March', 'April', 'May', 'June',
 136     'July', 'August', 'September', 'October', 'November', 'December']
 137
 138 MONTH_NAMES = {
 139     'en': ENGLISH_MONTH_NAMES,
 140     'fr': [
 141         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 142         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 143     # these follow the genitive grammatical case (dopełniacz)
 144     # some websites might be using nominative, which will require another month list
 145     # https://en.wikibooks.org/wiki/Polish/Noun_cases
 146     'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
 147            'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
 148 }
 149
 150 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
 151 TIMEZONE_NAMES = {
 152     'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
 153     'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
 154     'EST': -5, 'EDT': -4,  # Eastern
 155     'CST': -6, 'CDT': -5,  # Central
 156     'MST': -7, 'MDT': -6,  # Mountain
 157     'PST': -8, 'PDT': -7   # Pacific
 158 }
 159
 160 # needed for sanitizing filenames in restricted mode
 161 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 162                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 163                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 164
 165 DATE_FORMATS = (
 166     '%d %B %Y',
 167     '%d %b %Y',
 168     '%B %d %Y',
 169     '%B %dst %Y',
 170     '%B %dnd %Y',
 171     '%B %drd %Y',
 172     '%B %dth %Y',
 173     '%b %d %Y',
 174     '%b %dst %Y',
 175     '%b %dnd %Y',
 176     '%b %drd %Y',
 177     '%b %dth %Y',
 178     '%b %dst %Y %I:%M',
 179     '%b %dnd %Y %I:%M',
 180     '%b %drd %Y %I:%M',
 181     '%b %dth %Y %I:%M',
 182     '%Y %m %d',
 183     '%Y-%m-%d',
 184     '%Y.%m.%d.',
 185     '%Y/%m/%d',
 186     '%Y/%m/%d %H:%M',
 187     '%Y/%m/%d %H:%M:%S',
 188     '%Y%m%d%H%M',
 189     '%Y%m%d%H%M%S',
 190     '%Y%m%d',
 191     '%Y-%m-%d %H:%M',
 192     '%Y-%m-%d %H:%M:%S',
 193     '%Y-%m-%d %H:%M:%S.%f',
 194     '%Y-%m-%d %H:%M:%S:%f',
 195     '%d.%m.%Y %H:%M',
 196     '%d.%m.%Y %H.%M',
 197     '%Y-%m-%dT%H:%M:%SZ',
 198     '%Y-%m-%dT%H:%M:%S.%fZ',
 199     '%Y-%m-%dT%H:%M:%S.%f0Z',
 200     '%Y-%m-%dT%H:%M:%S',
 201     '%Y-%m-%dT%H:%M:%S.%f',
 202     '%Y-%m-%dT%H:%M',
 203     '%b %d %Y at %H:%M',
 204     '%b %d %Y at %H:%M:%S',
 205     '%B %d %Y at %H:%M',
 206     '%B %d %Y at %H:%M:%S',
 207     '%H:%M %d-%b-%Y',
 208 )
 209
 210 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 211 DATE_FORMATS_DAY_FIRST.extend([
 212     '%d-%m-%Y',
 213     '%d.%m.%Y',
 214     '%d.%m.%y',
 215     '%d/%m/%Y',
 216     '%d/%m/%y',
 217     '%d/%m/%Y %H:%M:%S',
 218     '%d-%m-%Y %H:%M',
 219 ])
 220
 221 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 222 DATE_FORMATS_MONTH_FIRST.extend([
 223     '%m-%d-%Y',
 224     '%m.%d.%Y',
 225     '%m/%d/%Y',
 226     '%m/%d/%y',
 227     '%m/%d/%Y %H:%M:%S',
 228 ])
 229
 230 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 231 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
 232
 233 NUMBER_RE = r'\d+(?:\.\d+)?'
 234
 235
 236 @functools.cache
 237 def preferredencoding():
 238     """Get preferred encoding.
 239
 240     Returns the best encoding scheme for the system, based on
 241     locale.getpreferredencoding() and some further tweaks.
 242     """
 243     try:
 244         pref = locale.getpreferredencoding()
 245         'TEST'.encode(pref)
 246     except Exception:
 247         pref = 'UTF-8'
 248
 249     return pref
 250
 251
 252 def write_json_file(obj, fn):
 253     """ Encode obj as JSON and write it to fn, atomically if possible """
 254
 255     tf = tempfile.NamedTemporaryFile(
 256         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 257         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 258
 259     try:
 260         with tf:
 261             json.dump(obj, tf, ensure_ascii=False)
 262         if sys.platform == 'win32':
 263             # Need to remove existing file on Windows, else os.rename raises
 264             # WindowsError or FileExistsError.
 265             with contextlib.suppress(OSError):
 266                 os.unlink(fn)
 267         with contextlib.suppress(OSError):
 268             mask = os.umask(0)
 269             os.umask(mask)
 270             os.chmod(tf.name, 0o666 & ~mask)
 271         os.rename(tf.name, fn)
 272     except Exception:
 273         with contextlib.suppress(OSError):
 274             os.remove(tf.name)
 275         raise
 276
 277
 278 def find_xpath_attr(node, xpath, key, val=None):
 279     """ Find the xpath xpath[@key=val] """
 280     assert re.match(r'^[a-zA-Z_-]+$', key)
 281     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 282     return node.find(expr)
 283
 284 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 285 # the namespace parameter
 286
 287
 288 def xpath_with_ns(path, ns_map):
 289     components = [c.split(':') for c in path.split('/')]
 290     replaced = []
 291     for c in components:
 292         if len(c) == 1:
 293             replaced.append(c[0])
 294         else:
 295             ns, tag = c
 296             replaced.append('{%s}%s' % (ns_map[ns], tag))
 297     return '/'.join(replaced)
 298
 299
 300 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 301     def _find_xpath(xpath):
 302         return node.find(xpath)
 303
 304     if isinstance(xpath, str):
 305         n = _find_xpath(xpath)
 306     else:
 307         for xp in xpath:
 308             n = _find_xpath(xp)
 309             if n is not None:
 310                 break
 311
 312     if n is None:
 313         if default is not NO_DEFAULT:
 314             return default
 315         elif fatal:
 316             name = xpath if name is None else name
 317             raise ExtractorError('Could not find XML element %s' % name)
 318         else:
 319             return None
 320     return n
 321
 322
 323 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 324     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 325     if n is None or n == default:
 326         return n
 327     if n.text is None:
 328         if default is not NO_DEFAULT:
 329             return default
 330         elif fatal:
 331             name = xpath if name is None else name
 332             raise ExtractorError('Could not find XML element\'s text %s' % name)
 333         else:
 334             return None
 335     return n.text
 336
 337
 338 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 339     n = find_xpath_attr(node, xpath, key)
 340     if n is None:
 341         if default is not NO_DEFAULT:
 342             return default
 343         elif fatal:
 344             name = f'{xpath}[@{key}]' if name is None else name
 345             raise ExtractorError('Could not find XML attribute %s' % name)
 346         else:
 347             return None
 348     return n.attrib[key]
 349
 350
 351 def get_element_by_id(id, html, **kwargs):
 352     """Return the content of the tag with the specified ID in the passed HTML document"""
 353     return get_element_by_attribute('id', id, html, **kwargs)
 354
 355
 356 def get_element_html_by_id(id, html, **kwargs):
 357     """Return the html of the tag with the specified ID in the passed HTML document"""
 358     return get_element_html_by_attribute('id', id, html, **kwargs)
 359
 360
 361 def get_element_by_class(class_name, html):
 362     """Return the content of the first tag with the specified class in the passed HTML document"""
 363     retval = get_elements_by_class(class_name, html)
 364     return retval[0] if retval else None
 365
 366
 367 def get_element_html_by_class(class_name, html):
 368     """Return the html of the first tag with the specified class in the passed HTML document"""
 369     retval = get_elements_html_by_class(class_name, html)
 370     return retval[0] if retval else None
 371
 372
 373 def get_element_by_attribute(attribute, value, html, **kwargs):
 374     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 375     return retval[0] if retval else None
 376
 377
 378 def get_element_html_by_attribute(attribute, value, html, **kargs):
 379     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 380     return retval[0] if retval else None
 381
 382
 383 def get_elements_by_class(class_name, html, **kargs):
 384     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 385     return get_elements_by_attribute(
 386         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 387         html, escape_value=False)
 388
 389
 390 def get_elements_html_by_class(class_name, html):
 391     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 392     return get_elements_html_by_attribute(
 393         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 394         html, escape_value=False)
 395
 396
 397 def get_elements_by_attribute(*args, **kwargs):
 398     """Return the content of the tag with the specified attribute in the passed HTML document"""
 399     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 400
 401
 402 def get_elements_html_by_attribute(*args, **kwargs):
 403     """Return the html of the tag with the specified attribute in the passed HTML document"""
 404     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 405
 406
 407 def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
 408     """
 409     Return the text (content) and the html (whole) of the tag with the specified
 410     attribute in the passed HTML document
 411     """
 412     if not value:
 413         return
 414
 415     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 416
 417     value = re.escape(value) if escape_value else value
 418
 419     partial_element_re = rf'''(?x)
 420         <(?P<tag>{tag})
 421          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 422          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 423         '''
 424
 425     for m in re.finditer(partial_element_re, html):
 426         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 427
 428         yield (
 429             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 430             whole
 431         )
 432
 433
 434 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
 435     """
 436     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 437     closing tag for the first opening tag it has encountered, and can be used
 438     as a context manager
 439     """
 440
 441     class HTMLBreakOnClosingTagException(Exception):
 442         pass
 443
 444     def __init__(self):
 445         self.tagstack = collections.deque()
 446         html.parser.HTMLParser.__init__(self)
 447
 448     def __enter__(self):
 449         return self
 450
 451     def __exit__(self, *_):
 452         self.close()
 453
 454     def close(self):
 455         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 456         # so data remains buffered; we no longer have any interest in it, thus
 457         # override this method to discard it
 458         pass
 459
 460     def handle_starttag(self, tag, _):
 461         self.tagstack.append(tag)
 462
 463     def handle_endtag(self, tag):
 464         if not self.tagstack:
 465             raise compat_HTMLParseError('no tags in the stack')
 466         while self.tagstack:
 467             inner_tag = self.tagstack.pop()
 468             if inner_tag == tag:
 469                 break
 470         else:
 471             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 472         if not self.tagstack:
 473             raise self.HTMLBreakOnClosingTagException()
 474
 475
 476 # XXX: This should be far less strict
 477 def get_element_text_and_html_by_tag(tag, html):
 478     """
 479     For the first element with the specified tag in the passed HTML document
 480     return its' content (text) and the whole element (html)
 481     """
 482     def find_or_raise(haystack, needle, exc):
 483         try:
 484             return haystack.index(needle)
 485         except ValueError:
 486             raise exc
 487     closing_tag = f'</{tag}>'
 488     whole_start = find_or_raise(
 489         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 490     content_start = find_or_raise(
 491         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 492     content_start += whole_start + 1
 493     with HTMLBreakOnClosingTagParser() as parser:
 494         parser.feed(html[whole_start:content_start])
 495         if not parser.tagstack or parser.tagstack[0] != tag:
 496             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 497         offset = content_start
 498         while offset < len(html):
 499             next_closing_tag_start = find_or_raise(
 500                 html[offset:], closing_tag,
 501                 compat_HTMLParseError(f'closing {tag} tag not found'))
 502             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 503             try:
 504                 parser.feed(html[offset:offset + next_closing_tag_end])
 505                 offset += next_closing_tag_end
 506             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 507                 return html[content_start:offset + next_closing_tag_start], \
 508                     html[whole_start:offset + next_closing_tag_end]
 509         raise compat_HTMLParseError('unexpected end of html')
 510
 511
 512 class HTMLAttributeParser(html.parser.HTMLParser):
 513     """Trivial HTML parser to gather the attributes for a single element"""
 514
 515     def __init__(self):
 516         self.attrs = {}
 517         html.parser.HTMLParser.__init__(self)
 518
 519     def handle_starttag(self, tag, attrs):
 520         self.attrs = dict(attrs)
 521         raise compat_HTMLParseError('done')
 522
 523
 524 class HTMLListAttrsParser(html.parser.HTMLParser):
 525     """HTML parser to gather the attributes for the elements of a list"""
 526
 527     def __init__(self):
 528         html.parser.HTMLParser.__init__(self)
 529         self.items = []
 530         self._level = 0
 531
 532     def handle_starttag(self, tag, attrs):
 533         if tag == 'li' and self._level == 0:
 534             self.items.append(dict(attrs))
 535         self._level += 1
 536
 537     def handle_endtag(self, tag):
 538         self._level -= 1
 539
 540
 541 def extract_attributes(html_element):
 542     """Given a string for an HTML element such as
 543     <el
 544          a="foo" B="bar" c="&98;az" d=boz
 545          empty= noval entity="&amp;"
 546          sq='"' dq="'"
 547     >
 548     Decode and return a dictionary of attributes.
 549     {
 550         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 551         'empty': '', 'noval': None, 'entity': '&',
 552         'sq': '"', 'dq': '\''
 553     }.
 554     """
 555     parser = HTMLAttributeParser()
 556     with contextlib.suppress(compat_HTMLParseError):
 557         parser.feed(html_element)
 558         parser.close()
 559     return parser.attrs
 560
 561
 562 def parse_list(webpage):
 563     """Given a string for an series of HTML <li> elements,
 564     return a dictionary of their attributes"""
 565     parser = HTMLListAttrsParser()
 566     parser.feed(webpage)
 567     parser.close()
 568     return parser.items
 569
 570
 571 def clean_html(html):
 572     """Clean an HTML snippet into a readable string"""
 573
 574     if html is None:  # Convenience for sanitizing descriptions etc.
 575         return html
 576
 577     html = re.sub(r'\s+', ' ', html)
 578     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 579     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 580     # Strip html tags
 581     html = re.sub('<.*?>', '', html)
 582     # Replace html entities
 583     html = unescapeHTML(html)
 584     return html.strip()
 585
 586
 587 class LenientJSONDecoder(json.JSONDecoder):
 588     # TODO: Write tests
 589     def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs):
 590         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 591         self._close_attempts = 2 * close_objects
 592         super().__init__(*args, **kwargs)
 593
 594     @staticmethod
 595     def _close_object(err):
 596         doc = err.doc[:err.pos]
 597         # We need to add comma first to get the correct error message
 598         if err.msg.startswith('Expecting \',\''):
 599             return doc + ','
 600         elif not doc.endswith(','):
 601             return
 602
 603         if err.msg.startswith('Expecting property name'):
 604             return doc[:-1] + '}'
 605         elif err.msg.startswith('Expecting value'):
 606             return doc[:-1] + ']'
 607
 608     def decode(self, s):
 609         if self.transform_source:
 610             s = self.transform_source(s)
 611         for attempt in range(self._close_attempts + 1):
 612             try:
 613                 if self.ignore_extra:
 614                     return self.raw_decode(s.lstrip())[0]
 615                 return super().decode(s)
 616             except json.JSONDecodeError as e:
 617                 if e.pos is None:
 618                     raise
 619                 elif attempt < self._close_attempts:
 620                     s = self._close_object(e)
 621                     if s is not None:
 622                         continue
 623                 raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
 624         assert False, 'Too many attempts to decode JSON'
 625
 626
 627 def sanitize_open(filename, open_mode):
 628     """Try to open the given filename, and slightly tweak it if this fails.
 629
 630     Attempts to open the given filename. If this fails, it tries to change
 631     the filename slightly, step by step, until it's either able to open it
 632     or it fails and raises a final exception, like the standard open()
 633     function.
 634
 635     It returns the tuple (stream, definitive_file_name).
 636     """
 637     if filename == '-':
 638         if sys.platform == 'win32':
 639             import msvcrt
 640
 641             # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
 642             with contextlib.suppress(io.UnsupportedOperation):
 643                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 644         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 645
 646     for attempt in range(2):
 647         try:
 648             try:
 649                 if sys.platform == 'win32':
 650                     # FIXME: An exclusive lock also locks the file from being read.
 651                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 652                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 653                     raise LockingUnsupportedError()
 654                 stream = locked_file(filename, open_mode, block=False).__enter__()
 655             except OSError:
 656                 stream = open(filename, open_mode)
 657             return stream, filename
 658         except OSError as err:
 659             if attempt or err.errno in (errno.EACCES,):
 660                 raise
 661             old_filename, filename = filename, sanitize_path(filename)
 662             if old_filename == filename:
 663                 raise
 664
 665
 666 def timeconvert(timestr):
 667     """Convert RFC 2822 defined time string into system timestamp"""
 668     timestamp = None
 669     timetuple = email.utils.parsedate_tz(timestr)
 670     if timetuple is not None:
 671         timestamp = email.utils.mktime_tz(timetuple)
 672     return timestamp
 673
 674
 675 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 676     """Sanitizes a string so it could be used as part of a filename.
 677     @param restricted   Use a stricter subset of allowed characters
 678     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 679                         If unset, yt-dlp's new sanitization rules are in effect
 680     """
 681     if s == '':
 682         return ''
 683
 684     def replace_insane(char):
 685         if restricted and char in ACCENT_CHARS:
 686             return ACCENT_CHARS[char]
 687         elif not restricted and char == '\n':
 688             return '\0 '
 689         elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
 690             # Replace with their full-width unicode counterparts
 691             return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
 692         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 693             return ''
 694         elif char == '"':
 695             return '' if restricted else '\''
 696         elif char == ':':
 697             return '\0_\0-' if restricted else '\0 \0-'
 698         elif char in '\\/|*<>':
 699             return '\0_'
 700         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 701             return '\0_'
 702         return char
 703
 704     # Replace look-alike Unicode glyphs
 705     if restricted and (is_id is NO_DEFAULT or not is_id):
 706         s = unicodedata.normalize('NFKC', s)
 707     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 708     result = ''.join(map(replace_insane, s))
 709     if is_id is NO_DEFAULT:
 710         result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
 711         STRIP_RE = r'(?:\0.|[ _-])*'
 712         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 713     result = result.replace('\0', '') or '_'
 714
 715     if not is_id:
 716         while '__' in result:
 717             result = result.replace('__', '_')
 718         result = result.strip('_')
 719         # Common case of "Foreign band name - English song title"
 720         if restricted and result.startswith('-_'):
 721             result = result[2:]
 722         if result.startswith('-'):
 723             result = '_' + result[len('-'):]
 724         result = result.lstrip('.')
 725         if not result:
 726             result = '_'
 727     return result
 728
 729
 730 def sanitize_path(s, force=False):
 731     """Sanitizes and normalizes path on Windows"""
 732     if sys.platform == 'win32':
 733         force = False
 734         drive_or_unc, _ = os.path.splitdrive(s)
 735     elif force:
 736         drive_or_unc = ''
 737     else:
 738         return s
 739
 740     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 741     if drive_or_unc:
 742         norm_path.pop(0)
 743     sanitized_path = [
 744         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 745         for path_part in norm_path]
 746     if drive_or_unc:
 747         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 748     elif force and s and s[0] == os.path.sep:
 749         sanitized_path.insert(0, os.path.sep)
 750     return os.path.join(*sanitized_path)
 751
 752
 753 def sanitize_url(url, *, scheme='http'):
 754     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 755     # the number of unwanted failures due to missing protocol
 756     if url is None:
 757         return
 758     elif url.startswith('//'):
 759         return f'{scheme}:{url}'
 760     # Fix some common typos seen so far
 761     COMMON_TYPOS = (
 762         # https://github.com/ytdl-org/youtube-dl/issues/15649
 763         (r'^httpss://', r'https://'),
 764         # https://bx1.be/lives/direct-tv/
 765         (r'^rmtp([es]?)://', r'rtmp\1://'),
 766     )
 767     for mistake, fixup in COMMON_TYPOS:
 768         if re.match(mistake, url):
 769             return re.sub(mistake, fixup, url)
 770     return url
 771
 772
 773 def extract_basic_auth(url):
 774     parts = urllib.parse.urlsplit(url)
 775     if parts.username is None:
 776         return url, None
 777     url = urllib.parse.urlunsplit(parts._replace(netloc=(
 778         parts.hostname if parts.port is None
 779         else '%s:%d' % (parts.hostname, parts.port))))
 780     auth_payload = base64.b64encode(
 781         ('%s:%s' % (parts.username, parts.password or '')).encode())
 782     return url, f'Basic {auth_payload.decode()}'
 783
 784
 785 def sanitized_Request(url, *args, **kwargs):
 786     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 787     if auth_header is not None:
 788         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 789         headers['Authorization'] = auth_header
 790     return urllib.request.Request(url, *args, **kwargs)
 791
 792
 793 def expand_path(s):
 794     """Expand shell variables and ~"""
 795     return os.path.expandvars(compat_expanduser(s))
 796
 797
 798 def orderedSet(iterable, *, lazy=False):
 799     """Remove all duplicates from the input iterable"""
 800     def _iter():
 801         seen = []  # Do not use set since the items can be unhashable
 802         for x in iterable:
 803             if x not in seen:
 804                 seen.append(x)
 805                 yield x
 806
 807     return _iter() if lazy else list(_iter())
 808
 809
 810 def _htmlentity_transform(entity_with_semicolon):
 811     """Transforms an HTML entity to a character."""
 812     entity = entity_with_semicolon[:-1]
 813
 814     # Known non-numeric HTML entity
 815     if entity in html.entities.name2codepoint:
 816         return chr(html.entities.name2codepoint[entity])
 817
 818     # TODO: HTML5 allows entities without a semicolon.
 819     # E.g. '&Eacuteric' should be decoded as 'Éric'.
 820     if entity_with_semicolon in html.entities.html5:
 821         return html.entities.html5[entity_with_semicolon]
 822
 823     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 824     if mobj is not None:
 825         numstr = mobj.group(1)
 826         if numstr.startswith('x'):
 827             base = 16
 828             numstr = '0%s' % numstr
 829         else:
 830             base = 10
 831         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 832         with contextlib.suppress(ValueError):
 833             return chr(int(numstr, base))
 834
 835     # Unknown entity in name, return its literal representation
 836     return '&%s;' % entity
 837
 838
 839 def unescapeHTML(s):
 840     if s is None:
 841         return None
 842     assert isinstance(s, str)
 843
 844     return re.sub(
 845         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 846
 847
 848 def escapeHTML(text):
 849     return (
 850         text
 851         .replace('&', '&amp;')
 852         .replace('<', '&lt;')
 853         .replace('>', '&gt;')
 854         .replace('"', '&quot;')
 855         .replace("'", '&#39;')
 856     )
 857
 858
 859 def process_communicate_or_kill(p, *args, **kwargs):
 860     deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
 861                         f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
 862     return Popen.communicate_or_kill(p, *args, **kwargs)
 863
 864
 865 class Popen(subprocess.Popen):
 866     if sys.platform == 'win32':
 867         _startupinfo = subprocess.STARTUPINFO()
 868         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 869     else:
 870         _startupinfo = None
 871
 872     @staticmethod
 873     def _fix_pyinstaller_ld_path(env):
 874         """Restore LD_LIBRARY_PATH when using PyInstaller
 875             Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
 876                  https://github.com/yt-dlp/yt-dlp/issues/4573
 877         """
 878         if not hasattr(sys, '_MEIPASS'):
 879             return
 880
 881         def _fix(key):
 882             orig = env.get(f'{key}_ORIG')
 883             if orig is None:
 884                 env.pop(key, None)
 885             else:
 886                 env[key] = orig
 887
 888         _fix('LD_LIBRARY_PATH')  # Linux
 889         _fix('DYLD_LIBRARY_PATH')  # macOS
 890
 891     def __init__(self, *args, env=None, text=False, **kwargs):
 892         if env is None:
 893             env = os.environ.copy()
 894         self._fix_pyinstaller_ld_path(env)
 895
 896         self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines')
 897         if text is True:
 898             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 899             kwargs.setdefault('encoding', 'utf-8')
 900             kwargs.setdefault('errors', 'replace')
 901         super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo)
 902
 903     def communicate_or_kill(self, *args, **kwargs):
 904         try:
 905             return self.communicate(*args, **kwargs)
 906         except BaseException:  # Including KeyboardInterrupt
 907             self.kill(timeout=None)
 908             raise
 909
 910     def kill(self, *, timeout=0):
 911         super().kill()
 912         if timeout != 0:
 913             self.wait(timeout=timeout)
 914
 915     @classmethod
 916     def run(cls, *args, timeout=None, **kwargs):
 917         with cls(*args, **kwargs) as proc:
 918             default = '' if proc.__text_mode else b''
 919             stdout, stderr = proc.communicate_or_kill(timeout=timeout)
 920             return stdout or default, stderr or default, proc.returncode
 921
 922
 923 def encodeArgument(s):
 924     # Legacy code that uses byte strings
 925     # Uncomment the following line after fixing all post processors
 926     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
 927     return s if isinstance(s, str) else s.decode('ascii')
 928
 929
 930 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 931
 932
 933 def timetuple_from_msec(msec):
 934     secs, msec = divmod(msec, 1000)
 935     mins, secs = divmod(secs, 60)
 936     hrs, mins = divmod(mins, 60)
 937     return _timetuple(hrs, mins, secs, msec)
 938
 939
 940 def formatSeconds(secs, delim=':', msec=False):
 941     time = timetuple_from_msec(secs * 1000)
 942     if time.hours:
 943         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 944     elif time.minutes:
 945         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 946     else:
 947         ret = '%d' % time.seconds
 948     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 949
 950
 951 def _ssl_load_windows_store_certs(ssl_context, storename):
 952     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 953     try:
 954         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 955                  if encoding == 'x509_asn' and (
 956                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 957     except PermissionError:
 958         return
 959     for cert in certs:
 960         with contextlib.suppress(ssl.SSLError):
 961             ssl_context.load_verify_locations(cadata=cert)
 962
 963
 964 def make_HTTPS_handler(params, **kwargs):
 965     opts_check_certificate = not params.get('nocheckcertificate')
 966     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 967     context.check_hostname = opts_check_certificate
 968     if params.get('legacyserverconnect'):
 969         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 970         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 971         context.set_ciphers('DEFAULT')
 972     elif (
 973         sys.version_info < (3, 10)
 974         and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1)
 975         and not ssl.OPENSSL_VERSION.startswith('LibreSSL')
 976     ):
 977         # Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1].
 978         # This is to ensure consistent behavior across Python versions, and help avoid fingerprinting
 979         # in some situations [2][3].
 980         # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely
 981         # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe.
 982         # LibreSSL is excluded until further investigation due to cipher support issues [5][6].
 983         # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536
 984         # 2. https://github.com/yt-dlp/yt-dlp/issues/4627
 985         # 3. https://github.com/yt-dlp/yt-dlp/pull/5294
 986         # 4. https://peps.python.org/pep-0644/
 987         # 5. https://peps.python.org/pep-0644/#libressl-support
 988         # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368
 989         context.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM')
 990         context.minimum_version = ssl.TLSVersion.TLSv1_2
 991
 992     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 993     if opts_check_certificate:
 994         if certifi and 'no-certifi' not in params.get('compat_opts', []):
 995             context.load_verify_locations(cafile=certifi.where())
 996         else:
 997             try:
 998                 context.load_default_certs()
 999                 # Work around the issue in load_default_certs when there are bad certificates. See:
1000                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
1001                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
1002             except ssl.SSLError:
1003                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
1004                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
1005                     for storename in ('CA', 'ROOT'):
1006                         _ssl_load_windows_store_certs(context, storename)
1007                 context.set_default_verify_paths()
1008
1009     client_certfile = params.get('client_certificate')
1010     if client_certfile:
1011         try:
1012             context.load_cert_chain(
1013                 client_certfile, keyfile=params.get('client_certificate_key'),
1014                 password=params.get('client_certificate_password'))
1015         except ssl.SSLError:
1016             raise YoutubeDLError('Unable to load client certificate')
1017
1018     # Some servers may reject requests if ALPN extension is not sent. See:
1019     # https://github.com/python/cpython/issues/85140
1020     # https://github.com/yt-dlp/yt-dlp/issues/3878
1021     with contextlib.suppress(NotImplementedError):
1022         context.set_alpn_protocols(['http/1.1'])
1023
1024     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
1025
1026
1027 def bug_reports_message(before=';'):
1028     from ..update import REPOSITORY
1029
1030     msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
1031            'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')
1032
1033     before = before.rstrip()
1034     if not before or before.endswith(('.', '!', '?')):
1035         msg = msg[0].title() + msg[1:]
1036
1037     return (before + ' ' if before else '') + msg
1038
1039
1040 class YoutubeDLError(Exception):
1041     """Base exception for YoutubeDL errors."""
1042     msg = None
1043
1044     def __init__(self, msg=None):
1045         if msg is not None:
1046             self.msg = msg
1047         elif self.msg is None:
1048             self.msg = type(self).__name__
1049         super().__init__(self.msg)
1050
1051
1052 network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
1053 if hasattr(ssl, 'CertificateError'):
1054     network_exceptions.append(ssl.CertificateError)
1055 network_exceptions = tuple(network_exceptions)
1056
1057
1058 class ExtractorError(YoutubeDLError):
1059     """Error during info extraction."""
1060
1061     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1062         """ tb, if given, is the original traceback (so that it can be printed out).
1063         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1064         """
1065         if sys.exc_info()[0] in network_exceptions:
1066             expected = True
1067
1068         self.orig_msg = str(msg)
1069         self.traceback = tb
1070         self.expected = expected
1071         self.cause = cause
1072         self.video_id = video_id
1073         self.ie = ie
1074         self.exc_info = sys.exc_info()  # preserve original exception
1075         if isinstance(self.exc_info[1], ExtractorError):
1076             self.exc_info = self.exc_info[1].exc_info
1077         super().__init__(self.__msg)
1078
1079     @property
1080     def __msg(self):
1081         return ''.join((
1082             format_field(self.ie, None, '[%s] '),
1083             format_field(self.video_id, None, '%s: '),
1084             self.orig_msg,
1085             format_field(self.cause, None, ' (caused by %r)'),
1086             '' if self.expected else bug_reports_message()))
1087
1088     def format_traceback(self):
1089         return join_nonempty(
1090             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1091             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1092             delim='\n') or None
1093
1094     def __setattr__(self, name, value):
1095         super().__setattr__(name, value)
1096         if getattr(self, 'msg', None) and name not in ('msg', 'args'):
1097             self.msg = self.__msg or type(self).__name__
1098             self.args = (self.msg, )  # Cannot be property
1099
1100
1101 class UnsupportedError(ExtractorError):
1102     def __init__(self, url):
1103         super().__init__(
1104             'Unsupported URL: %s' % url, expected=True)
1105         self.url = url
1106
1107
1108 class RegexNotFoundError(ExtractorError):
1109     """Error when a regex didn't match"""
1110     pass
1111
1112
1113 class GeoRestrictedError(ExtractorError):
1114     """Geographic restriction Error exception.
1115
1116     This exception may be thrown when a video is not available from your
1117     geographic location due to geographic restrictions imposed by a website.
1118     """
1119
1120     def __init__(self, msg, countries=None, **kwargs):
1121         kwargs['expected'] = True
1122         super().__init__(msg, **kwargs)
1123         self.countries = countries
1124
1125
1126 class UserNotLive(ExtractorError):
1127     """Error when a channel/user is not live"""
1128
1129     def __init__(self, msg=None, **kwargs):
1130         kwargs['expected'] = True
1131         super().__init__(msg or 'The channel is not currently live', **kwargs)
1132
1133
1134 class DownloadError(YoutubeDLError):
1135     """Download Error exception.
1136
1137     This exception may be thrown by FileDownloader objects if they are not
1138     configured to continue on errors. They will contain the appropriate
1139     error message.
1140     """
1141
1142     def __init__(self, msg, exc_info=None):
1143         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1144         super().__init__(msg)
1145         self.exc_info = exc_info
1146
1147
1148 class EntryNotInPlaylist(YoutubeDLError):
1149     """Entry not in playlist exception.
1150
1151     This exception will be thrown by YoutubeDL when a requested entry
1152     is not found in the playlist info_dict
1153     """
1154     msg = 'Entry not found in info'
1155
1156
1157 class SameFileError(YoutubeDLError):
1158     """Same File exception.
1159
1160     This exception will be thrown by FileDownloader objects if they detect
1161     multiple files would have to be downloaded to the same file on disk.
1162     """
1163     msg = 'Fixed output name but more than one file to download'
1164
1165     def __init__(self, filename=None):
1166         if filename is not None:
1167             self.msg += f': {filename}'
1168         super().__init__(self.msg)
1169
1170
1171 class PostProcessingError(YoutubeDLError):
1172     """Post Processing exception.
1173
1174     This exception may be raised by PostProcessor's .run() method to
1175     indicate an error in the postprocessing task.
1176     """
1177
1178
1179 class DownloadCancelled(YoutubeDLError):
1180     """ Exception raised when the download queue should be interrupted """
1181     msg = 'The download was cancelled'
1182
1183
1184 class ExistingVideoReached(DownloadCancelled):
1185     """ --break-on-existing triggered """
1186     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1187
1188
1189 class RejectedVideoReached(DownloadCancelled):
1190     """ --break-match-filter triggered """
1191     msg = 'Encountered a video that did not match filter, stopping due to --break-match-filter'
1192
1193
1194 class MaxDownloadsReached(DownloadCancelled):
1195     """ --max-downloads limit has been reached. """
1196     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1197
1198
1199 class ReExtractInfo(YoutubeDLError):
1200     """ Video info needs to be re-extracted. """
1201
1202     def __init__(self, msg, expected=False):
1203         super().__init__(msg)
1204         self.expected = expected
1205
1206
1207 class ThrottledDownload(ReExtractInfo):
1208     """ Download speed below --throttled-rate. """
1209     msg = 'The download speed is below throttle limit'
1210
1211     def __init__(self):
1212         super().__init__(self.msg, expected=False)
1213
1214
1215 class UnavailableVideoError(YoutubeDLError):
1216     """Unavailable Format exception.
1217
1218     This exception will be thrown when a video is requested
1219     in a format that is not available for that video.
1220     """
1221     msg = 'Unable to download video'
1222
1223     def __init__(self, err=None):
1224         if err is not None:
1225             self.msg += f': {err}'
1226         super().__init__(self.msg)
1227
1228
1229 class ContentTooShortError(YoutubeDLError):
1230     """Content Too Short exception.
1231
1232     This exception may be raised by FileDownloader objects when a file they
1233     download is too small for what the server announced first, indicating
1234     the connection was probably interrupted.
1235     """
1236
1237     def __init__(self, downloaded, expected):
1238         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1239         # Both in bytes
1240         self.downloaded = downloaded
1241         self.expected = expected
1242
1243
1244 class XAttrMetadataError(YoutubeDLError):
1245     def __init__(self, code=None, msg='Unknown error'):
1246         super().__init__(msg)
1247         self.code = code
1248         self.msg = msg
1249
1250         # Parsing code and msg
1251         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1252                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1253             self.reason = 'NO_SPACE'
1254         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1255             self.reason = 'VALUE_TOO_LONG'
1256         else:
1257             self.reason = 'NOT_SUPPORTED'
1258
1259
1260 class XAttrUnavailableError(YoutubeDLError):
1261     pass
1262
1263
1264 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1265     hc = http_class(*args, **kwargs)
1266     source_address = ydl_handler._params.get('source_address')
1267
1268     if source_address is not None:
1269         # This is to workaround _create_connection() from socket where it will try all
1270         # address data from getaddrinfo() including IPv6. This filters the result from
1271         # getaddrinfo() based on the source_address value.
1272         # This is based on the cpython socket.create_connection() function.
1273         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1274         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1275             host, port = address
1276             err = None
1277             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1278             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1279             ip_addrs = [addr for addr in addrs if addr[0] == af]
1280             if addrs and not ip_addrs:
1281                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1282                 raise OSError(
1283                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1284                     % (ip_version, source_address[0]))
1285             for res in ip_addrs:
1286                 af, socktype, proto, canonname, sa = res
1287                 sock = None
1288                 try:
1289                     sock = socket.socket(af, socktype, proto)
1290                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1291                         sock.settimeout(timeout)
1292                     sock.bind(source_address)
1293                     sock.connect(sa)
1294                     err = None  # Explicitly break reference cycle
1295                     return sock
1296                 except OSError as _:
1297                     err = _
1298                     if sock is not None:
1299                         sock.close()
1300             if err is not None:
1301                 raise err
1302             else:
1303                 raise OSError('getaddrinfo returns an empty list')
1304         if hasattr(hc, '_create_connection'):
1305             hc._create_connection = _create_connection
1306         hc.source_address = (source_address, 0)
1307
1308     return hc
1309
1310
1311 class YoutubeDLHandler(urllib.request.HTTPHandler):
1312     """Handler for HTTP requests and responses.
1313
1314     This class, when installed with an OpenerDirector, automatically adds
1315     the standard headers to every HTTP request and handles gzipped, deflated and
1316     brotli responses from web servers.
1317
1318     Part of this code was copied from:
1319
1320     http://techknack.net/python-urllib2-handlers/
1321
1322     Andrew Rowls, the author of that code, agreed to release it to the
1323     public domain.
1324     """
1325
1326     def __init__(self, params, *args, **kwargs):
1327         urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
1328         self._params = params
1329
1330     def http_open(self, req):
1331         conn_class = http.client.HTTPConnection
1332
1333         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1334         if socks_proxy:
1335             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1336             del req.headers['Ytdl-socks-proxy']
1337
1338         return self.do_open(functools.partial(
1339             _create_http_connection, self, conn_class, False),
1340             req)
1341
1342     @staticmethod
1343     def deflate(data):
1344         if not data:
1345             return data
1346         try:
1347             return zlib.decompress(data, -zlib.MAX_WBITS)
1348         except zlib.error:
1349             return zlib.decompress(data)
1350
1351     @staticmethod
1352     def brotli(data):
1353         if not data:
1354             return data
1355         return brotli.decompress(data)
1356
1357     def http_request(self, req):
1358         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1359         # always respected by websites, some tend to give out URLs with non percent-encoded
1360         # non-ASCII characters (see telemb.py, ard.py [#3412])
1361         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1362         # To work around aforementioned issue we will replace request's original URL with
1363         # percent-encoded one
1364         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1365         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1366         url = req.get_full_url()
1367         url_escaped = escape_url(url)
1368
1369         # Substitute URL if any change after escaping
1370         if url != url_escaped:
1371             req = update_Request(req, url=url_escaped)
1372
1373         for h, v in self._params.get('http_headers', std_headers).items():
1374             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1375             # The dict keys are capitalized because of this bug by urllib
1376             if h.capitalize() not in req.headers:
1377                 req.add_header(h, v)
1378
1379         if 'Youtubedl-no-compression' in req.headers:  # deprecated
1380             req.headers.pop('Youtubedl-no-compression', None)
1381             req.add_header('Accept-encoding', 'identity')
1382
1383         if 'Accept-encoding' not in req.headers:
1384             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1385
1386         return super().do_request_(req)
1387
1388     def http_response(self, req, resp):
1389         old_resp = resp
1390         # gzip
1391         if resp.headers.get('Content-encoding', '') == 'gzip':
1392             content = resp.read()
1393             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1394             try:
1395                 uncompressed = io.BytesIO(gz.read())
1396             except OSError as original_ioerror:
1397                 # There may be junk add the end of the file
1398                 # See http://stackoverflow.com/q/4928560/35070 for details
1399                 for i in range(1, 1024):
1400                     try:
1401                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1402                         uncompressed = io.BytesIO(gz.read())
1403                     except OSError:
1404                         continue
1405                     break
1406                 else:
1407                     raise original_ioerror
1408             resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1409             resp.msg = old_resp.msg
1410         # deflate
1411         if resp.headers.get('Content-encoding', '') == 'deflate':
1412             gz = io.BytesIO(self.deflate(resp.read()))
1413             resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1414             resp.msg = old_resp.msg
1415         # brotli
1416         if resp.headers.get('Content-encoding', '') == 'br':
1417             resp = urllib.request.addinfourl(
1418                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1419             resp.msg = old_resp.msg
1420         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1421         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1422         if 300 <= resp.code < 400:
1423             location = resp.headers.get('Location')
1424             if location:
1425                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1426                 location = location.encode('iso-8859-1').decode()
1427                 location_escaped = escape_url(location)
1428                 if location != location_escaped:
1429                     del resp.headers['Location']
1430                     resp.headers['Location'] = location_escaped
1431         return resp
1432
1433     https_request = http_request
1434     https_response = http_response
1435
1436
1437 def make_socks_conn_class(base_class, socks_proxy):
1438     assert issubclass(base_class, (
1439         http.client.HTTPConnection, http.client.HTTPSConnection))
1440
1441     url_components = urllib.parse.urlparse(socks_proxy)
1442     if url_components.scheme.lower() == 'socks5':
1443         socks_type = ProxyType.SOCKS5
1444     elif url_components.scheme.lower() in ('socks', 'socks4'):
1445         socks_type = ProxyType.SOCKS4
1446     elif url_components.scheme.lower() == 'socks4a':
1447         socks_type = ProxyType.SOCKS4A
1448
1449     def unquote_if_non_empty(s):
1450         if not s:
1451             return s
1452         return urllib.parse.unquote_plus(s)
1453
1454     proxy_args = (
1455         socks_type,
1456         url_components.hostname, url_components.port or 1080,
1457         True,  # Remote DNS
1458         unquote_if_non_empty(url_components.username),
1459         unquote_if_non_empty(url_components.password),
1460     )
1461
1462     class SocksConnection(base_class):
1463         def connect(self):
1464             self.sock = sockssocket()
1465             self.sock.setproxy(*proxy_args)
1466             if isinstance(self.timeout, (int, float)):
1467                 self.sock.settimeout(self.timeout)
1468             self.sock.connect((self.host, self.port))
1469
1470             if isinstance(self, http.client.HTTPSConnection):
1471                 if hasattr(self, '_context'):  # Python > 2.6
1472                     self.sock = self._context.wrap_socket(
1473                         self.sock, server_hostname=self.host)
1474                 else:
1475                     self.sock = ssl.wrap_socket(self.sock)
1476
1477     return SocksConnection
1478
1479
1480 class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
1481     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1482         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
1483         self._https_conn_class = https_conn_class or http.client.HTTPSConnection
1484         self._params = params
1485
1486     def https_open(self, req):
1487         kwargs = {}
1488         conn_class = self._https_conn_class
1489
1490         if hasattr(self, '_context'):  # python > 2.6
1491             kwargs['context'] = self._context
1492         if hasattr(self, '_check_hostname'):  # python 3.x
1493             kwargs['check_hostname'] = self._check_hostname
1494
1495         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1496         if socks_proxy:
1497             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1498             del req.headers['Ytdl-socks-proxy']
1499
1500         try:
1501             return self.do_open(
1502                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1503         except urllib.error.URLError as e:
1504             if (isinstance(e.reason, ssl.SSLError)
1505                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1506                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1507             raise
1508
1509
1510 def is_path_like(f):
1511     return isinstance(f, (str, bytes, os.PathLike))
1512
1513
1514 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1515     """
1516     See [1] for cookie file format.
1517
1518     1. https://curl.haxx.se/docs/http-cookies.html
1519     """
1520     _HTTPONLY_PREFIX = '#HttpOnly_'
1521     _ENTRY_LEN = 7
1522     _HEADER = '''# Netscape HTTP Cookie File
1523 # This file is generated by yt-dlp.  Do not edit.
1524
1525 '''
1526     _CookieFileEntry = collections.namedtuple(
1527         'CookieFileEntry',
1528         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1529
1530     def __init__(self, filename=None, *args, **kwargs):
1531         super().__init__(None, *args, **kwargs)
1532         if is_path_like(filename):
1533             filename = os.fspath(filename)
1534         self.filename = filename
1535
1536     @staticmethod
1537     def _true_or_false(cndn):
1538         return 'TRUE' if cndn else 'FALSE'
1539
1540     @contextlib.contextmanager
1541     def open(self, file, *, write=False):
1542         if is_path_like(file):
1543             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1544                 yield f
1545         else:
1546             if write:
1547                 file.truncate(0)
1548             yield file
1549
1550     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1551         now = time.time()
1552         for cookie in self:
1553             if (not ignore_discard and cookie.discard
1554                     or not ignore_expires and cookie.is_expired(now)):
1555                 continue
1556             name, value = cookie.name, cookie.value
1557             if value is None:
1558                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1559                 # with no name, whereas http.cookiejar regards it as a
1560                 # cookie with no value.
1561                 name, value = '', name
1562             f.write('%s\n' % '\t'.join((
1563                 cookie.domain,
1564                 self._true_or_false(cookie.domain.startswith('.')),
1565                 cookie.path,
1566                 self._true_or_false(cookie.secure),
1567                 str_or_none(cookie.expires, default=''),
1568                 name, value
1569             )))
1570
1571     def save(self, filename=None, *args, **kwargs):
1572         """
1573         Save cookies to a file.
1574         Code is taken from CPython 3.6
1575         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1576
1577         if filename is None:
1578             if self.filename is not None:
1579                 filename = self.filename
1580             else:
1581                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1582
1583         # Store session cookies with `expires` set to 0 instead of an empty string
1584         for cookie in self:
1585             if cookie.expires is None:
1586                 cookie.expires = 0
1587
1588         with self.open(filename, write=True) as f:
1589             f.write(self._HEADER)
1590             self._really_save(f, *args, **kwargs)
1591
1592     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1593         """Load cookies from a file."""
1594         if filename is None:
1595             if self.filename is not None:
1596                 filename = self.filename
1597             else:
1598                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1599
1600         def prepare_line(line):
1601             if line.startswith(self._HTTPONLY_PREFIX):
1602                 line = line[len(self._HTTPONLY_PREFIX):]
1603             # comments and empty lines are fine
1604             if line.startswith('#') or not line.strip():
1605                 return line
1606             cookie_list = line.split('\t')
1607             if len(cookie_list) != self._ENTRY_LEN:
1608                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1609             cookie = self._CookieFileEntry(*cookie_list)
1610             if cookie.expires_at and not cookie.expires_at.isdigit():
1611                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1612             return line
1613
1614         cf = io.StringIO()
1615         with self.open(filename) as f:
1616             for line in f:
1617                 try:
1618                     cf.write(prepare_line(line))
1619                 except http.cookiejar.LoadError as e:
1620                     if f'{line.strip()} '[0] in '[{"':
1621                         raise http.cookiejar.LoadError(
1622                             'Cookies file must be Netscape formatted, not JSON. See  '
1623                             'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1624                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1625                     continue
1626         cf.seek(0)
1627         self._really_load(cf, filename, ignore_discard, ignore_expires)
1628         # Session cookies are denoted by either `expires` field set to
1629         # an empty string or 0. MozillaCookieJar only recognizes the former
1630         # (see [1]). So we need force the latter to be recognized as session
1631         # cookies on our own.
1632         # Session cookies may be important for cookies-based authentication,
1633         # e.g. usually, when user does not check 'Remember me' check box while
1634         # logging in on a site, some important cookies are stored as session
1635         # cookies so that not recognizing them will result in failed login.
1636         # 1. https://bugs.python.org/issue17164
1637         for cookie in self:
1638             # Treat `expires=0` cookies as session cookies
1639             if cookie.expires == 0:
1640                 cookie.expires = None
1641                 cookie.discard = True
1642
1643
1644 class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
1645     def __init__(self, cookiejar=None):
1646         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
1647
1648     def http_response(self, request, response):
1649         return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
1650
1651     https_request = urllib.request.HTTPCookieProcessor.http_request
1652     https_response = http_response
1653
1654
1655 class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
1656     """YoutubeDL redirect handler
1657
1658     The code is based on HTTPRedirectHandler implementation from CPython [1].
1659
1660     This redirect handler solves two issues:
1661      - ensures redirect URL is always unicode under python 2
1662      - introduces support for experimental HTTP response status code
1663        308 Permanent Redirect [2] used by some sites [3]
1664
1665     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1666     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1667     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1668     """
1669
1670     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
1671
1672     def redirect_request(self, req, fp, code, msg, headers, newurl):
1673         """Return a Request or None in response to a redirect.
1674
1675         This is called by the http_error_30x methods when a
1676         redirection response is received.  If a redirection should
1677         take place, return a new Request to allow http_error_30x to
1678         perform the redirect.  Otherwise, raise HTTPError if no-one
1679         else should try to handle this url.  Return None if you can't
1680         but another Handler might.
1681         """
1682         m = req.get_method()
1683         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1684                  or code in (301, 302, 303) and m == "POST")):
1685             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
1686         # Strictly (according to RFC 2616), 301 or 302 in response to
1687         # a POST MUST NOT cause a redirection without confirmation
1688         # from the user (of urllib.request, in this case).  In practice,
1689         # essentially all clients do redirect in this case, so we do
1690         # the same.
1691
1692         # Be conciliant with URIs containing a space.  This is mainly
1693         # redundant with the more complete encoding done in http_error_302(),
1694         # but it is kept for compatibility with other callers.
1695         newurl = newurl.replace(' ', '%20')
1696
1697         CONTENT_HEADERS = ("content-length", "content-type")
1698         # NB: don't use dict comprehension for python 2.6 compatibility
1699         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1700
1701         # A 303 must either use GET or HEAD for subsequent request
1702         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1703         if code == 303 and m != 'HEAD':
1704             m = 'GET'
1705         # 301 and 302 redirects are commonly turned into a GET from a POST
1706         # for subsequent requests by browsers, so we'll do the same.
1707         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1708         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1709         if code in (301, 302) and m == 'POST':
1710             m = 'GET'
1711
1712         return urllib.request.Request(
1713             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1714             unverifiable=True, method=m)
1715
1716
1717 def extract_timezone(date_str):
1718     m = re.search(
1719         r'''(?x)
1720             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1721             (?P<tz>Z|                                            # just the UTC Z, or
1722                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1723                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1724                    [ ]?                                          # optional space
1725                 (?P<sign>\+|-)                                   # +/-
1726                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1727             $)
1728         ''', date_str)
1729     if not m:
1730         m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1731         timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
1732         if timezone is not None:
1733             date_str = date_str[:-len(m.group('tz'))]
1734         timezone = datetime.timedelta(hours=timezone or 0)
1735     else:
1736         date_str = date_str[:-len(m.group('tz'))]
1737         if not m.group('sign'):
1738             timezone = datetime.timedelta()
1739         else:
1740             sign = 1 if m.group('sign') == '+' else -1
1741             timezone = datetime.timedelta(
1742                 hours=sign * int(m.group('hours')),
1743                 minutes=sign * int(m.group('minutes')))
1744     return timezone, date_str
1745
1746
1747 def parse_iso8601(date_str, delimiter='T', timezone=None):
1748     """ Return a UNIX timestamp from the given date """
1749
1750     if date_str is None:
1751         return None
1752
1753     date_str = re.sub(r'\.[0-9]+', '', date_str)
1754
1755     if timezone is None:
1756         timezone, date_str = extract_timezone(date_str)
1757
1758     with contextlib.suppress(ValueError):
1759         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1760         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1761         return calendar.timegm(dt.timetuple())
1762
1763
1764 def date_formats(day_first=True):
1765     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1766
1767
1768 def unified_strdate(date_str, day_first=True):
1769     """Return a string with the date in the format YYYYMMDD"""
1770
1771     if date_str is None:
1772         return None
1773     upload_date = None
1774     # Replace commas
1775     date_str = date_str.replace(',', ' ')
1776     # Remove AM/PM + timezone
1777     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1778     _, date_str = extract_timezone(date_str)
1779
1780     for expression in date_formats(day_first):
1781         with contextlib.suppress(ValueError):
1782             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1783     if upload_date is None:
1784         timetuple = email.utils.parsedate_tz(date_str)
1785         if timetuple:
1786             with contextlib.suppress(ValueError):
1787                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1788     if upload_date is not None:
1789         return str(upload_date)
1790
1791
1792 def unified_timestamp(date_str, day_first=True):
1793     if date_str is None:
1794         return None
1795
1796     date_str = re.sub(r'\s+', ' ', re.sub(
1797         r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
1798
1799     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1800     timezone, date_str = extract_timezone(date_str)
1801
1802     # Remove AM/PM + timezone
1803     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1804
1805     # Remove unrecognized timezones from ISO 8601 alike timestamps
1806     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1807     if m:
1808         date_str = date_str[:-len(m.group('tz'))]
1809
1810     # Python only supports microseconds, so remove nanoseconds
1811     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1812     if m:
1813         date_str = m.group(1)
1814
1815     for expression in date_formats(day_first):
1816         with contextlib.suppress(ValueError):
1817             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1818             return calendar.timegm(dt.timetuple())
1819
1820     timetuple = email.utils.parsedate_tz(date_str)
1821     if timetuple:
1822         return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
1823
1824
1825 def determine_ext(url, default_ext='unknown_video'):
1826     if url is None or '.' not in url:
1827         return default_ext
1828     guess = url.partition('?')[0].rpartition('.')[2]
1829     if re.match(r'^[A-Za-z0-9]+$', guess):
1830         return guess
1831     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1832     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1833         return guess.rstrip('/')
1834     else:
1835         return default_ext
1836
1837
1838 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1839     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1840
1841
1842 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1843     R"""
1844     Return a datetime object from a string.
1845     Supported format:
1846         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1847
1848     @param format       strftime format of DATE
1849     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1850                         auto: round to the unit provided in date_str (if applicable).
1851     """
1852     auto_precision = False
1853     if precision == 'auto':
1854         auto_precision = True
1855         precision = 'microsecond'
1856     today = datetime_round(datetime.datetime.utcnow(), precision)
1857     if date_str in ('now', 'today'):
1858         return today
1859     if date_str == 'yesterday':
1860         return today - datetime.timedelta(days=1)
1861     match = re.match(
1862         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1863         date_str)
1864     if match is not None:
1865         start_time = datetime_from_str(match.group('start'), precision, format)
1866         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1867         unit = match.group('unit')
1868         if unit == 'month' or unit == 'year':
1869             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1870             unit = 'day'
1871         else:
1872             if unit == 'week':
1873                 unit = 'day'
1874                 time *= 7
1875             delta = datetime.timedelta(**{unit + 's': time})
1876             new_date = start_time + delta
1877         if auto_precision:
1878             return datetime_round(new_date, unit)
1879         return new_date
1880
1881     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1882
1883
1884 def date_from_str(date_str, format='%Y%m%d', strict=False):
1885     R"""
1886     Return a date object from a string using datetime_from_str
1887
1888     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1889                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1890     """
1891     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1892         raise ValueError(f'Invalid date format "{date_str}"')
1893     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1894
1895
1896 def datetime_add_months(dt, months):
1897     """Increment/Decrement a datetime object by months."""
1898     month = dt.month + months - 1
1899     year = dt.year + month // 12
1900     month = month % 12 + 1
1901     day = min(dt.day, calendar.monthrange(year, month)[1])
1902     return dt.replace(year, month, day)
1903
1904
1905 def datetime_round(dt, precision='day'):
1906     """
1907     Round a datetime object's time to a specific precision
1908     """
1909     if precision == 'microsecond':
1910         return dt
1911
1912     unit_seconds = {
1913         'day': 86400,
1914         'hour': 3600,
1915         'minute': 60,
1916         'second': 1,
1917     }
1918     roundto = lambda x, n: ((x + n / 2) // n) * n
1919     timestamp = calendar.timegm(dt.timetuple())
1920     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1921
1922
1923 def hyphenate_date(date_str):
1924     """
1925     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1926     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1927     if match is not None:
1928         return '-'.join(match.groups())
1929     else:
1930         return date_str
1931
1932
1933 class DateRange:
1934     """Represents a time interval between two dates"""
1935
1936     def __init__(self, start=None, end=None):
1937         """start and end must be strings in the format accepted by date"""
1938         if start is not None:
1939             self.start = date_from_str(start, strict=True)
1940         else:
1941             self.start = datetime.datetime.min.date()
1942         if end is not None:
1943             self.end = date_from_str(end, strict=True)
1944         else:
1945             self.end = datetime.datetime.max.date()
1946         if self.start > self.end:
1947             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1948
1949     @classmethod
1950     def day(cls, day):
1951         """Returns a range that only contains the given day"""
1952         return cls(day, day)
1953
1954     def __contains__(self, date):
1955         """Check if the date is in the range"""
1956         if not isinstance(date, datetime.date):
1957             date = date_from_str(date)
1958         return self.start <= date <= self.end
1959
1960     def __str__(self):
1961         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1962
1963     def __eq__(self, other):
1964         return (isinstance(other, DateRange)
1965                 and self.start == other.start and self.end == other.end)
1966
1967
1968 @functools.cache
1969 def system_identifier():
1970     python_implementation = platform.python_implementation()
1971     if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
1972         python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
1973     libc_ver = []
1974     with contextlib.suppress(OSError):  # We may not have access to the executable
1975         libc_ver = platform.libc_ver()
1976
1977     return 'Python %s (%s %s %s) - %s (%s%s)' % (
1978         platform.python_version(),
1979         python_implementation,
1980         platform.machine(),
1981         platform.architecture()[0],
1982         platform.platform(),
1983         ssl.OPENSSL_VERSION,
1984         format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
1985     )
1986
1987
1988 @functools.cache
1989 def get_windows_version():
1990     ''' Get Windows version. returns () if it's not running on Windows '''
1991     if compat_os_name == 'nt':
1992         return version_tuple(platform.win32_ver()[1])
1993     else:
1994         return ()
1995
1996
1997 def write_string(s, out=None, encoding=None):
1998     assert isinstance(s, str)
1999     out = out or sys.stderr
2000     # `sys.stderr` might be `None` (Ref: https://github.com/pyinstaller/pyinstaller/pull/7217)
2001     if not out:
2002         return
2003
2004     if compat_os_name == 'nt' and supports_terminal_sequences(out):
2005         s = re.sub(r'([\r\n]+)', r' \1', s)
2006
2007     enc, buffer = None, out
2008     if 'b' in getattr(out, 'mode', ''):
2009         enc = encoding or preferredencoding()
2010     elif hasattr(out, 'buffer'):
2011         buffer = out.buffer
2012         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
2013
2014     buffer.write(s.encode(enc, 'ignore') if enc else s)
2015     out.flush()
2016
2017
2018 def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
2019     from .. import _IN_CLI
2020     if _IN_CLI:
2021         if msg in deprecation_warning._cache:
2022             return
2023         deprecation_warning._cache.add(msg)
2024         if printer:
2025             return printer(f'{msg}{bug_reports_message()}', **kwargs)
2026         return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
2027     else:
2028         import warnings
2029         warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
2030
2031
2032 deprecation_warning._cache = set()
2033
2034
2035 def bytes_to_intlist(bs):
2036     if not bs:
2037         return []
2038     if isinstance(bs[0], int):  # Python 3
2039         return list(bs)
2040     else:
2041         return [ord(c) for c in bs]
2042
2043
2044 def intlist_to_bytes(xs):
2045     if not xs:
2046         return b''
2047     return struct.pack('%dB' % len(xs), *xs)
2048
2049
2050 class LockingUnsupportedError(OSError):
2051     msg = 'File locking is not supported'
2052
2053     def __init__(self):
2054         super().__init__(self.msg)
2055
2056
2057 # Cross-platform file locking
2058 if sys.platform == 'win32':
2059     import ctypes
2060     import ctypes.wintypes
2061     import msvcrt
2062
2063     class OVERLAPPED(ctypes.Structure):
2064         _fields_ = [
2065             ('Internal', ctypes.wintypes.LPVOID),
2066             ('InternalHigh', ctypes.wintypes.LPVOID),
2067             ('Offset', ctypes.wintypes.DWORD),
2068             ('OffsetHigh', ctypes.wintypes.DWORD),
2069             ('hEvent', ctypes.wintypes.HANDLE),
2070         ]
2071
2072     kernel32 = ctypes.WinDLL('kernel32')
2073     LockFileEx = kernel32.LockFileEx
2074     LockFileEx.argtypes = [
2075         ctypes.wintypes.HANDLE,     # hFile
2076         ctypes.wintypes.DWORD,      # dwFlags
2077         ctypes.wintypes.DWORD,      # dwReserved
2078         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2079         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2080         ctypes.POINTER(OVERLAPPED)  # Overlapped
2081     ]
2082     LockFileEx.restype = ctypes.wintypes.BOOL
2083     UnlockFileEx = kernel32.UnlockFileEx
2084     UnlockFileEx.argtypes = [
2085         ctypes.wintypes.HANDLE,     # hFile
2086         ctypes.wintypes.DWORD,      # dwReserved
2087         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2088         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2089         ctypes.POINTER(OVERLAPPED)  # Overlapped
2090     ]
2091     UnlockFileEx.restype = ctypes.wintypes.BOOL
2092     whole_low = 0xffffffff
2093     whole_high = 0x7fffffff
2094
2095     def _lock_file(f, exclusive, block):
2096         overlapped = OVERLAPPED()
2097         overlapped.Offset = 0
2098         overlapped.OffsetHigh = 0
2099         overlapped.hEvent = 0
2100         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2101
2102         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2103                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2104                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2105             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2106             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2107
2108     def _unlock_file(f):
2109         assert f._lock_file_overlapped_p
2110         handle = msvcrt.get_osfhandle(f.fileno())
2111         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2112             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2113
2114 else:
2115     try:
2116         import fcntl
2117
2118         def _lock_file(f, exclusive, block):
2119             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2120             if not block:
2121                 flags |= fcntl.LOCK_NB
2122             try:
2123                 fcntl.flock(f, flags)
2124             except BlockingIOError:
2125                 raise
2126             except OSError:  # AOSP does not have flock()
2127                 fcntl.lockf(f, flags)
2128
2129         def _unlock_file(f):
2130             with contextlib.suppress(OSError):
2131                 return fcntl.flock(f, fcntl.LOCK_UN)
2132             with contextlib.suppress(OSError):
2133                 return fcntl.lockf(f, fcntl.LOCK_UN)  # AOSP does not have flock()
2134             return fcntl.flock(f, fcntl.LOCK_UN | fcntl.LOCK_NB)  # virtiofs needs LOCK_NB on unlocking
2135
2136     except ImportError:
2137
2138         def _lock_file(f, exclusive, block):
2139             raise LockingUnsupportedError()
2140
2141         def _unlock_file(f):
2142             raise LockingUnsupportedError()
2143
2144
2145 class locked_file:
2146     locked = False
2147
2148     def __init__(self, filename, mode, block=True, encoding=None):
2149         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2150             raise NotImplementedError(mode)
2151         self.mode, self.block = mode, block
2152
2153         writable = any(f in mode for f in 'wax+')
2154         readable = any(f in mode for f in 'r+')
2155         flags = functools.reduce(operator.ior, (
2156             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2157             getattr(os, 'O_BINARY', 0),  # Windows only
2158             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2159             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2160             os.O_APPEND if 'a' in mode else 0,
2161             os.O_EXCL if 'x' in mode else 0,
2162             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2163         ))
2164
2165         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2166
2167     def __enter__(self):
2168         exclusive = 'r' not in self.mode
2169         try:
2170             _lock_file(self.f, exclusive, self.block)
2171             self.locked = True
2172         except OSError:
2173             self.f.close()
2174             raise
2175         if 'w' in self.mode:
2176             try:
2177                 self.f.truncate()
2178             except OSError as e:
2179                 if e.errno not in (
2180                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2181                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2182                 ):
2183                     raise
2184         return self
2185
2186     def unlock(self):
2187         if not self.locked:
2188             return
2189         try:
2190             _unlock_file(self.f)
2191         finally:
2192             self.locked = False
2193
2194     def __exit__(self, *_):
2195         try:
2196             self.unlock()
2197         finally:
2198             self.f.close()
2199
2200     open = __enter__
2201     close = __exit__
2202
2203     def __getattr__(self, attr):
2204         return getattr(self.f, attr)
2205
2206     def __iter__(self):
2207         return iter(self.f)
2208
2209
2210 @functools.cache
2211 def get_filesystem_encoding():
2212     encoding = sys.getfilesystemencoding()
2213     return encoding if encoding is not None else 'utf-8'
2214
2215
2216 def shell_quote(args):
2217     quoted_args = []
2218     encoding = get_filesystem_encoding()
2219     for a in args:
2220         if isinstance(a, bytes):
2221             # We may get a filename encoded with 'encodeFilename'
2222             a = a.decode(encoding)
2223         quoted_args.append(compat_shlex_quote(a))
2224     return ' '.join(quoted_args)
2225
2226
2227 def smuggle_url(url, data):
2228     """ Pass additional data in a URL for internal use. """
2229
2230     url, idata = unsmuggle_url(url, {})
2231     data.update(idata)
2232     sdata = urllib.parse.urlencode(
2233         {'__youtubedl_smuggle': json.dumps(data)})
2234     return url + '#' + sdata
2235
2236
2237 def unsmuggle_url(smug_url, default=None):
2238     if '#__youtubedl_smuggle' not in smug_url:
2239         return smug_url, default
2240     url, _, sdata = smug_url.rpartition('#')
2241     jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
2242     data = json.loads(jsond)
2243     return url, data
2244
2245
2246 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2247     """ Formats numbers with decimal sufixes like K, M, etc """
2248     num, factor = float_or_none(num), float(factor)
2249     if num is None or num < 0:
2250         return None
2251     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2252     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2253     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2254     if factor == 1024:
2255         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2256     converted = num / (factor ** exponent)
2257     return fmt % (converted, suffix)
2258
2259
2260 def format_bytes(bytes):
2261     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2262
2263
2264 def lookup_unit_table(unit_table, s, strict=False):
2265     num_re = NUMBER_RE if strict else NUMBER_RE.replace(R'\.', '[,.]')
2266     units_re = '|'.join(re.escape(u) for u in unit_table)
2267     m = (re.fullmatch if strict else re.match)(
2268         rf'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s)
2269     if not m:
2270         return None
2271
2272     num = float(m.group('num').replace(',', '.'))
2273     mult = unit_table[m.group('unit')]
2274     return round(num * mult)
2275
2276
2277 def parse_bytes(s):
2278     """Parse a string indicating a byte quantity into an integer"""
2279     return lookup_unit_table(
2280         {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])},
2281         s.upper(), strict=True)
2282
2283
2284 def parse_filesize(s):
2285     if s is None:
2286         return None
2287
2288     # The lower-case forms are of course incorrect and unofficial,
2289     # but we support those too
2290     _UNIT_TABLE = {
2291         'B': 1,
2292         'b': 1,
2293         'bytes': 1,
2294         'KiB': 1024,
2295         'KB': 1000,
2296         'kB': 1024,
2297         'Kb': 1000,
2298         'kb': 1000,
2299         'kilobytes': 1000,
2300         'kibibytes': 1024,
2301         'MiB': 1024 ** 2,
2302         'MB': 1000 ** 2,
2303         'mB': 1024 ** 2,
2304         'Mb': 1000 ** 2,
2305         'mb': 1000 ** 2,
2306         'megabytes': 1000 ** 2,
2307         'mebibytes': 1024 ** 2,
2308         'GiB': 1024 ** 3,
2309         'GB': 1000 ** 3,
2310         'gB': 1024 ** 3,
2311         'Gb': 1000 ** 3,
2312         'gb': 1000 ** 3,
2313         'gigabytes': 1000 ** 3,
2314         'gibibytes': 1024 ** 3,
2315         'TiB': 1024 ** 4,
2316         'TB': 1000 ** 4,
2317         'tB': 1024 ** 4,
2318         'Tb': 1000 ** 4,
2319         'tb': 1000 ** 4,
2320         'terabytes': 1000 ** 4,
2321         'tebibytes': 1024 ** 4,
2322         'PiB': 1024 ** 5,
2323         'PB': 1000 ** 5,
2324         'pB': 1024 ** 5,
2325         'Pb': 1000 ** 5,
2326         'pb': 1000 ** 5,
2327         'petabytes': 1000 ** 5,
2328         'pebibytes': 1024 ** 5,
2329         'EiB': 1024 ** 6,
2330         'EB': 1000 ** 6,
2331         'eB': 1024 ** 6,
2332         'Eb': 1000 ** 6,
2333         'eb': 1000 ** 6,
2334         'exabytes': 1000 ** 6,
2335         'exbibytes': 1024 ** 6,
2336         'ZiB': 1024 ** 7,
2337         'ZB': 1000 ** 7,
2338         'zB': 1024 ** 7,
2339         'Zb': 1000 ** 7,
2340         'zb': 1000 ** 7,
2341         'zettabytes': 1000 ** 7,
2342         'zebibytes': 1024 ** 7,
2343         'YiB': 1024 ** 8,
2344         'YB': 1000 ** 8,
2345         'yB': 1024 ** 8,
2346         'Yb': 1000 ** 8,
2347         'yb': 1000 ** 8,
2348         'yottabytes': 1000 ** 8,
2349         'yobibytes': 1024 ** 8,
2350     }
2351
2352     return lookup_unit_table(_UNIT_TABLE, s)
2353
2354
2355 def parse_count(s):
2356     if s is None:
2357         return None
2358
2359     s = re.sub(r'^[^\d]+\s', '', s).strip()
2360
2361     if re.match(r'^[\d,.]+$', s):
2362         return str_to_int(s)
2363
2364     _UNIT_TABLE = {
2365         'k': 1000,
2366         'K': 1000,
2367         'm': 1000 ** 2,
2368         'M': 1000 ** 2,
2369         'kk': 1000 ** 2,
2370         'KK': 1000 ** 2,
2371         'b': 1000 ** 3,
2372         'B': 1000 ** 3,
2373     }
2374
2375     ret = lookup_unit_table(_UNIT_TABLE, s)
2376     if ret is not None:
2377         return ret
2378
2379     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2380     if mobj:
2381         return str_to_int(mobj.group(1))
2382
2383
2384 def parse_resolution(s, *, lenient=False):
2385     if s is None:
2386         return {}
2387
2388     if lenient:
2389         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2390     else:
2391         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2392     if mobj:
2393         return {
2394             'width': int(mobj.group('w')),
2395             'height': int(mobj.group('h')),
2396         }
2397
2398     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2399     if mobj:
2400         return {'height': int(mobj.group(1))}
2401
2402     mobj = re.search(r'\b([48])[kK]\b', s)
2403     if mobj:
2404         return {'height': int(mobj.group(1)) * 540}
2405
2406     return {}
2407
2408
2409 def parse_bitrate(s):
2410     if not isinstance(s, str):
2411         return
2412     mobj = re.search(r'\b(\d+)\s*kbps', s)
2413     if mobj:
2414         return int(mobj.group(1))
2415
2416
2417 def month_by_name(name, lang='en'):
2418     """ Return the number of a month by (locale-independently) English name """
2419
2420     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2421
2422     try:
2423         return month_names.index(name) + 1
2424     except ValueError:
2425         return None
2426
2427
2428 def month_by_abbreviation(abbrev):
2429     """ Return the number of a month by (locale-independently) English
2430         abbreviations """
2431
2432     try:
2433         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2434     except ValueError:
2435         return None
2436
2437
2438 def fix_xml_ampersands(xml_str):
2439     """Replace all the '&' by '&amp;' in XML"""
2440     return re.sub(
2441         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2442         '&amp;',
2443         xml_str)
2444
2445
2446 def setproctitle(title):
2447     assert isinstance(title, str)
2448
2449     # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
2450     try:
2451         import ctypes
2452     except ImportError:
2453         return
2454
2455     try:
2456         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2457     except OSError:
2458         return
2459     except TypeError:
2460         # LoadLibrary in Windows Python 2.7.13 only expects
2461         # a bytestring, but since unicode_literals turns
2462         # every string into a unicode string, it fails.
2463         return
2464     title_bytes = title.encode()
2465     buf = ctypes.create_string_buffer(len(title_bytes))
2466     buf.value = title_bytes
2467     try:
2468         libc.prctl(15, buf, 0, 0, 0)
2469     except AttributeError:
2470         return  # Strange libc, just skip this
2471
2472
2473 def remove_start(s, start):
2474     return s[len(start):] if s is not None and s.startswith(start) else s
2475
2476
2477 def remove_end(s, end):
2478     return s[:-len(end)] if s is not None and s.endswith(end) else s
2479
2480
2481 def remove_quotes(s):
2482     if s is None or len(s) < 2:
2483         return s
2484     for quote in ('"', "'", ):
2485         if s[0] == quote and s[-1] == quote:
2486             return s[1:-1]
2487     return s
2488
2489
2490 def get_domain(url):
2491     """
2492     This implementation is inconsistent, but is kept for compatibility.
2493     Use this only for "webpage_url_domain"
2494     """
2495     return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
2496
2497
2498 def url_basename(url):
2499     path = urllib.parse.urlparse(url).path
2500     return path.strip('/').split('/')[-1]
2501
2502
2503 def base_url(url):
2504     return re.match(r'https?://[^?#]+/', url).group()
2505
2506
2507 def urljoin(base, path):
2508     if isinstance(path, bytes):
2509         path = path.decode()
2510     if not isinstance(path, str) or not path:
2511         return None
2512     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2513         return path
2514     if isinstance(base, bytes):
2515         base = base.decode()
2516     if not isinstance(base, str) or not re.match(
2517             r'^(?:https?:)?//', base):
2518         return None
2519     return urllib.parse.urljoin(base, path)
2520
2521
2522 class HEADRequest(urllib.request.Request):
2523     def get_method(self):
2524         return 'HEAD'
2525
2526
2527 class PUTRequest(urllib.request.Request):
2528     def get_method(self):
2529         return 'PUT'
2530
2531
2532 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2533     if get_attr and v is not None:
2534         v = getattr(v, get_attr, None)
2535     try:
2536         return int(v) * invscale // scale
2537     except (ValueError, TypeError, OverflowError):
2538         return default
2539
2540
2541 def str_or_none(v, default=None):
2542     return default if v is None else str(v)
2543
2544
2545 def str_to_int(int_str):
2546     """ A more relaxed version of int_or_none """
2547     if isinstance(int_str, int):
2548         return int_str
2549     elif isinstance(int_str, str):
2550         int_str = re.sub(r'[,\.\+]', '', int_str)
2551         return int_or_none(int_str)
2552
2553
2554 def float_or_none(v, scale=1, invscale=1, default=None):
2555     if v is None:
2556         return default
2557     try:
2558         return float(v) * invscale / scale
2559     except (ValueError, TypeError):
2560         return default
2561
2562
2563 def bool_or_none(v, default=None):
2564     return v if isinstance(v, bool) else default
2565
2566
2567 def strip_or_none(v, default=None):
2568     return v.strip() if isinstance(v, str) else default
2569
2570
2571 def url_or_none(url):
2572     if not url or not isinstance(url, str):
2573         return None
2574     url = url.strip()
2575     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2576
2577
2578 def request_to_url(req):
2579     if isinstance(req, urllib.request.Request):
2580         return req.get_full_url()
2581     else:
2582         return req
2583
2584
2585 def strftime_or_none(timestamp, date_format, default=None):
2586     datetime_object = None
2587     try:
2588         if isinstance(timestamp, (int, float)):  # unix timestamp
2589             # Using naive datetime here can break timestamp() in Windows
2590             # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
2591             datetime_object = datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
2592         elif isinstance(timestamp, str):  # assume YYYYMMDD
2593             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2594         date_format = re.sub(  # Support %s on windows
2595             r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
2596         return datetime_object.strftime(date_format)
2597     except (ValueError, TypeError, AttributeError):
2598         return default
2599
2600
2601 def parse_duration(s):
2602     if not isinstance(s, str):
2603         return None
2604     s = s.strip()
2605     if not s:
2606         return None
2607
2608     days, hours, mins, secs, ms = [None] * 5
2609     m = re.match(r'''(?x)
2610             (?P<before_secs>
2611                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2612             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2613             (?P<ms>[.:][0-9]+)?Z?$
2614         ''', s)
2615     if m:
2616         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2617     else:
2618         m = re.match(
2619             r'''(?ix)(?:P?
2620                 (?:
2621                     [0-9]+\s*y(?:ears?)?,?\s*
2622                 )?
2623                 (?:
2624                     [0-9]+\s*m(?:onths?)?,?\s*
2625                 )?
2626                 (?:
2627                     [0-9]+\s*w(?:eeks?)?,?\s*
2628                 )?
2629                 (?:
2630                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2631                 )?
2632                 T)?
2633                 (?:
2634                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2635                 )?
2636                 (?:
2637                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2638                 )?
2639                 (?:
2640                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2641                 )?Z?$''', s)
2642         if m:
2643             days, hours, mins, secs, ms = m.groups()
2644         else:
2645             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2646             if m:
2647                 hours, mins = m.groups()
2648             else:
2649                 return None
2650
2651     if ms:
2652         ms = ms.replace(':', '.')
2653     return sum(float(part or 0) * mult for part, mult in (
2654         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2655
2656
2657 def prepend_extension(filename, ext, expected_real_ext=None):
2658     name, real_ext = os.path.splitext(filename)
2659     return (
2660         f'{name}.{ext}{real_ext}'
2661         if not expected_real_ext or real_ext[1:] == expected_real_ext
2662         else f'{filename}.{ext}')
2663
2664
2665 def replace_extension(filename, ext, expected_real_ext=None):
2666     name, real_ext = os.path.splitext(filename)
2667     return '{}.{}'.format(
2668         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2669         ext)
2670
2671
2672 def check_executable(exe, args=[]):
2673     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2674     args can be a list of arguments for a short output (like -version) """
2675     try:
2676         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2677     except OSError:
2678         return False
2679     return exe
2680
2681
2682 def _get_exe_version_output(exe, args):
2683     try:
2684         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2685         # SIGTTOU if yt-dlp is run in the background.
2686         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2687         stdout, _, ret = Popen.run([encodeArgument(exe)] + args, text=True,
2688                                    stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2689         if ret:
2690             return None
2691     except OSError:
2692         return False
2693     return stdout
2694
2695
2696 def detect_exe_version(output, version_re=None, unrecognized='present'):
2697     assert isinstance(output, str)
2698     if version_re is None:
2699         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2700     m = re.search(version_re, output)
2701     if m:
2702         return m.group(1)
2703     else:
2704         return unrecognized
2705
2706
2707 def get_exe_version(exe, args=['--version'],
2708                     version_re=None, unrecognized=('present', 'broken')):
2709     """ Returns the version of the specified executable,
2710     or False if the executable is not present """
2711     unrecognized = variadic(unrecognized)
2712     assert len(unrecognized) in (1, 2)
2713     out = _get_exe_version_output(exe, args)
2714     if out is None:
2715         return unrecognized[-1]
2716     return out and detect_exe_version(out, version_re, unrecognized[0])
2717
2718
2719 def frange(start=0, stop=None, step=1):
2720     """Float range"""
2721     if stop is None:
2722         start, stop = 0, start
2723     sign = [-1, 1][step > 0] if step else 0
2724     while sign * start < sign * stop:
2725         yield start
2726         start += step
2727
2728
2729 class LazyList(collections.abc.Sequence):
2730     """Lazy immutable list from an iterable
2731     Note that slices of a LazyList are lists and not LazyList"""
2732
2733     class IndexError(IndexError):
2734         pass
2735
2736     def __init__(self, iterable, *, reverse=False, _cache=None):
2737         self._iterable = iter(iterable)
2738         self._cache = [] if _cache is None else _cache
2739         self._reversed = reverse
2740
2741     def __iter__(self):
2742         if self._reversed:
2743             # We need to consume the entire iterable to iterate in reverse
2744             yield from self.exhaust()
2745             return
2746         yield from self._cache
2747         for item in self._iterable:
2748             self._cache.append(item)
2749             yield item
2750
2751     def _exhaust(self):
2752         self._cache.extend(self._iterable)
2753         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2754         return self._cache
2755
2756     def exhaust(self):
2757         """Evaluate the entire iterable"""
2758         return self._exhaust()[::-1 if self._reversed else 1]
2759
2760     @staticmethod
2761     def _reverse_index(x):
2762         return None if x is None else ~x
2763
2764     def __getitem__(self, idx):
2765         if isinstance(idx, slice):
2766             if self._reversed:
2767                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2768             start, stop, step = idx.start, idx.stop, idx.step or 1
2769         elif isinstance(idx, int):
2770             if self._reversed:
2771                 idx = self._reverse_index(idx)
2772             start, stop, step = idx, idx, 0
2773         else:
2774             raise TypeError('indices must be integers or slices')
2775         if ((start or 0) < 0 or (stop or 0) < 0
2776                 or (start is None and step < 0)
2777                 or (stop is None and step > 0)):
2778             # We need to consume the entire iterable to be able to slice from the end
2779             # Obviously, never use this with infinite iterables
2780             self._exhaust()
2781             try:
2782                 return self._cache[idx]
2783             except IndexError as e:
2784                 raise self.IndexError(e) from e
2785         n = max(start or 0, stop or 0) - len(self._cache) + 1
2786         if n > 0:
2787             self._cache.extend(itertools.islice(self._iterable, n))
2788         try:
2789             return self._cache[idx]
2790         except IndexError as e:
2791             raise self.IndexError(e) from e
2792
2793     def __bool__(self):
2794         try:
2795             self[-1] if self._reversed else self[0]
2796         except self.IndexError:
2797             return False
2798         return True
2799
2800     def __len__(self):
2801         self._exhaust()
2802         return len(self._cache)
2803
2804     def __reversed__(self):
2805         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2806
2807     def __copy__(self):
2808         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2809
2810     def __repr__(self):
2811         # repr and str should mimic a list. So we exhaust the iterable
2812         return repr(self.exhaust())
2813
2814     def __str__(self):
2815         return repr(self.exhaust())
2816
2817
2818 class PagedList:
2819
2820     class IndexError(IndexError):
2821         pass
2822
2823     def __len__(self):
2824         # This is only useful for tests
2825         return len(self.getslice())
2826
2827     def __init__(self, pagefunc, pagesize, use_cache=True):
2828         self._pagefunc = pagefunc
2829         self._pagesize = pagesize
2830         self._pagecount = float('inf')
2831         self._use_cache = use_cache
2832         self._cache = {}
2833
2834     def getpage(self, pagenum):
2835         page_results = self._cache.get(pagenum)
2836         if page_results is None:
2837             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2838         if self._use_cache:
2839             self._cache[pagenum] = page_results
2840         return page_results
2841
2842     def getslice(self, start=0, end=None):
2843         return list(self._getslice(start, end))
2844
2845     def _getslice(self, start, end):
2846         raise NotImplementedError('This method must be implemented by subclasses')
2847
2848     def __getitem__(self, idx):
2849         assert self._use_cache, 'Indexing PagedList requires cache'
2850         if not isinstance(idx, int) or idx < 0:
2851             raise TypeError('indices must be non-negative integers')
2852         entries = self.getslice(idx, idx + 1)
2853         if not entries:
2854             raise self.IndexError()
2855         return entries[0]
2856
2857
2858 class OnDemandPagedList(PagedList):
2859     """Download pages until a page with less than maximum results"""
2860
2861     def _getslice(self, start, end):
2862         for pagenum in itertools.count(start // self._pagesize):
2863             firstid = pagenum * self._pagesize
2864             nextfirstid = pagenum * self._pagesize + self._pagesize
2865             if start >= nextfirstid:
2866                 continue
2867
2868             startv = (
2869                 start % self._pagesize
2870                 if firstid <= start < nextfirstid
2871                 else 0)
2872             endv = (
2873                 ((end - 1) % self._pagesize) + 1
2874                 if (end is not None and firstid <= end <= nextfirstid)
2875                 else None)
2876
2877             try:
2878                 page_results = self.getpage(pagenum)
2879             except Exception:
2880                 self._pagecount = pagenum - 1
2881                 raise
2882             if startv != 0 or endv is not None:
2883                 page_results = page_results[startv:endv]
2884             yield from page_results
2885
2886             # A little optimization - if current page is not "full", ie. does
2887             # not contain page_size videos then we can assume that this page
2888             # is the last one - there are no more ids on further pages -
2889             # i.e. no need to query again.
2890             if len(page_results) + startv < self._pagesize:
2891                 break
2892
2893             # If we got the whole page, but the next page is not interesting,
2894             # break out early as well
2895             if end == nextfirstid:
2896                 break
2897
2898
2899 class InAdvancePagedList(PagedList):
2900     """PagedList with total number of pages known in advance"""
2901
2902     def __init__(self, pagefunc, pagecount, pagesize):
2903         PagedList.__init__(self, pagefunc, pagesize, True)
2904         self._pagecount = pagecount
2905
2906     def _getslice(self, start, end):
2907         start_page = start // self._pagesize
2908         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2909         skip_elems = start - start_page * self._pagesize
2910         only_more = None if end is None else end - start
2911         for pagenum in range(start_page, end_page):
2912             page_results = self.getpage(pagenum)
2913             if skip_elems:
2914                 page_results = page_results[skip_elems:]
2915                 skip_elems = None
2916             if only_more is not None:
2917                 if len(page_results) < only_more:
2918                     only_more -= len(page_results)
2919                 else:
2920                     yield from page_results[:only_more]
2921                     break
2922             yield from page_results
2923
2924
2925 class PlaylistEntries:
2926     MissingEntry = object()
2927     is_exhausted = False
2928
2929     def __init__(self, ydl, info_dict):
2930         self.ydl = ydl
2931
2932         # _entries must be assigned now since infodict can change during iteration
2933         entries = info_dict.get('entries')
2934         if entries is None:
2935             raise EntryNotInPlaylist('There are no entries')
2936         elif isinstance(entries, list):
2937             self.is_exhausted = True
2938
2939         requested_entries = info_dict.get('requested_entries')
2940         self.is_incomplete = requested_entries is not None
2941         if self.is_incomplete:
2942             assert self.is_exhausted
2943             self._entries = [self.MissingEntry] * max(requested_entries or [0])
2944             for i, entry in zip(requested_entries, entries):
2945                 self._entries[i - 1] = entry
2946         elif isinstance(entries, (list, PagedList, LazyList)):
2947             self._entries = entries
2948         else:
2949             self._entries = LazyList(entries)
2950
2951     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2952         (?P<start>[+-]?\d+)?
2953         (?P<range>[:-]
2954             (?P<end>[+-]?\d+|inf(?:inite)?)?
2955             (?::(?P<step>[+-]?\d+))?
2956         )?''')
2957
2958     @classmethod
2959     def parse_playlist_items(cls, string):
2960         for segment in string.split(','):
2961             if not segment:
2962                 raise ValueError('There is two or more consecutive commas')
2963             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2964             if not mobj:
2965                 raise ValueError(f'{segment!r} is not a valid specification')
2966             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2967             if int_or_none(step) == 0:
2968                 raise ValueError(f'Step in {segment!r} cannot be zero')
2969             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2970
2971     def get_requested_items(self):
2972         playlist_items = self.ydl.params.get('playlist_items')
2973         playlist_start = self.ydl.params.get('playliststart', 1)
2974         playlist_end = self.ydl.params.get('playlistend')
2975         # For backwards compatibility, interpret -1 as whole list
2976         if playlist_end in (-1, None):
2977             playlist_end = ''
2978         if not playlist_items:
2979             playlist_items = f'{playlist_start}:{playlist_end}'
2980         elif playlist_start != 1 or playlist_end:
2981             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
2982
2983         for index in self.parse_playlist_items(playlist_items):
2984             for i, entry in self[index]:
2985                 yield i, entry
2986                 if not entry:
2987                     continue
2988                 try:
2989                     # The item may have just been added to archive. Don't break due to it
2990                     if not self.ydl.params.get('lazy_playlist'):
2991                         # TODO: Add auto-generated fields
2992                         self.ydl._match_entry(entry, incomplete=True, silent=True)
2993                 except (ExistingVideoReached, RejectedVideoReached):
2994                     return
2995
2996     def get_full_count(self):
2997         if self.is_exhausted and not self.is_incomplete:
2998             return len(self)
2999         elif isinstance(self._entries, InAdvancePagedList):
3000             if self._entries._pagesize == 1:
3001                 return self._entries._pagecount
3002
3003     @functools.cached_property
3004     def _getter(self):
3005         if isinstance(self._entries, list):
3006             def get_entry(i):
3007                 try:
3008                     entry = self._entries[i]
3009                 except IndexError:
3010                     entry = self.MissingEntry
3011                     if not self.is_incomplete:
3012                         raise self.IndexError()
3013                 if entry is self.MissingEntry:
3014                     raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found')
3015                 return entry
3016         else:
3017             def get_entry(i):
3018                 try:
3019                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
3020                 except (LazyList.IndexError, PagedList.IndexError):
3021                     raise self.IndexError()
3022         return get_entry
3023
3024     def __getitem__(self, idx):
3025         if isinstance(idx, int):
3026             idx = slice(idx, idx)
3027
3028         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
3029         step = 1 if idx.step is None else idx.step
3030         if idx.start is None:
3031             start = 0 if step > 0 else len(self) - 1
3032         else:
3033             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
3034
3035         # NB: Do not call len(self) when idx == [:]
3036         if idx.stop is None:
3037             stop = 0 if step < 0 else float('inf')
3038         else:
3039             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
3040         stop += [-1, 1][step > 0]
3041
3042         for i in frange(start, stop, step):
3043             if i < 0:
3044                 continue
3045             try:
3046                 entry = self._getter(i)
3047             except self.IndexError:
3048                 self.is_exhausted = True
3049                 if step > 0:
3050                     break
3051                 continue
3052             yield i + 1, entry
3053
3054     def __len__(self):
3055         return len(tuple(self[:]))
3056
3057     class IndexError(IndexError):
3058         pass
3059
3060
3061 def uppercase_escape(s):
3062     unicode_escape = codecs.getdecoder('unicode_escape')
3063     return re.sub(
3064         r'\\U[0-9a-fA-F]{8}',
3065         lambda m: unicode_escape(m.group(0))[0],
3066         s)
3067
3068
3069 def lowercase_escape(s):
3070     unicode_escape = codecs.getdecoder('unicode_escape')
3071     return re.sub(
3072         r'\\u[0-9a-fA-F]{4}',
3073         lambda m: unicode_escape(m.group(0))[0],
3074         s)
3075
3076
3077 def escape_rfc3986(s):
3078     """Escape non-ASCII characters as suggested by RFC 3986"""
3079     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3080
3081
3082 def escape_url(url):
3083     """Escape URL as suggested by RFC 3986"""
3084     url_parsed = urllib.parse.urlparse(url)
3085     return url_parsed._replace(
3086         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3087         path=escape_rfc3986(url_parsed.path),
3088         params=escape_rfc3986(url_parsed.params),
3089         query=escape_rfc3986(url_parsed.query),
3090         fragment=escape_rfc3986(url_parsed.fragment)
3091     ).geturl()
3092
3093
3094 def parse_qs(url, **kwargs):
3095     return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
3096
3097
3098 def read_batch_urls(batch_fd):
3099     def fixup(url):
3100         if not isinstance(url, str):
3101             url = url.decode('utf-8', 'replace')
3102         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3103         for bom in BOM_UTF8:
3104             if url.startswith(bom):
3105                 url = url[len(bom):]
3106         url = url.lstrip()
3107         if not url or url.startswith(('#', ';', ']')):
3108             return False
3109         # "#" cannot be stripped out since it is part of the URI
3110         # However, it can be safely stripped out if following a whitespace
3111         return re.split(r'\s#', url, 1)[0].rstrip()
3112
3113     with contextlib.closing(batch_fd) as fd:
3114         return [url for url in map(fixup, fd) if url]
3115
3116
3117 def urlencode_postdata(*args, **kargs):
3118     return urllib.parse.urlencode(*args, **kargs).encode('ascii')
3119
3120
3121 def update_url(url, *, query_update=None, **kwargs):
3122     """Replace URL components specified by kwargs
3123        @param url           str or parse url tuple
3124        @param query_update  update query
3125        @returns             str
3126     """
3127     if isinstance(url, str):
3128         if not kwargs and not query_update:
3129             return url
3130         else:
3131             url = urllib.parse.urlparse(url)
3132     if query_update:
3133         assert 'query' not in kwargs, 'query_update and query cannot be specified at the same time'
3134         kwargs['query'] = urllib.parse.urlencode({
3135             **urllib.parse.parse_qs(url.query),
3136             **query_update
3137         }, True)
3138     return urllib.parse.urlunparse(url._replace(**kwargs))
3139
3140
3141 def update_url_query(url, query):
3142     return update_url(url, query_update=query)
3143
3144
3145 def update_Request(req, url=None, data=None, headers=None, query=None):
3146     req_headers = req.headers.copy()
3147     req_headers.update(headers or {})
3148     req_data = data or req.data
3149     req_url = update_url_query(url or req.get_full_url(), query)
3150     req_get_method = req.get_method()
3151     if req_get_method == 'HEAD':
3152         req_type = HEADRequest
3153     elif req_get_method == 'PUT':
3154         req_type = PUTRequest
3155     else:
3156         req_type = urllib.request.Request
3157     new_req = req_type(
3158         req_url, data=req_data, headers=req_headers,
3159         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3160     if hasattr(req, 'timeout'):
3161         new_req.timeout = req.timeout
3162     return new_req
3163
3164
3165 def _multipart_encode_impl(data, boundary):
3166     content_type = 'multipart/form-data; boundary=%s' % boundary
3167
3168     out = b''
3169     for k, v in data.items():
3170         out += b'--' + boundary.encode('ascii') + b'\r\n'
3171         if isinstance(k, str):
3172             k = k.encode()
3173         if isinstance(v, str):
3174             v = v.encode()
3175         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3176         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3177         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3178         if boundary.encode('ascii') in content:
3179             raise ValueError('Boundary overlaps with data')
3180         out += content
3181
3182     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3183
3184     return out, content_type
3185
3186
3187 def multipart_encode(data, boundary=None):
3188     '''
3189     Encode a dict to RFC 7578-compliant form-data
3190
3191     data:
3192         A dict where keys and values can be either Unicode or bytes-like
3193         objects.
3194     boundary:
3195         If specified a Unicode object, it's used as the boundary. Otherwise
3196         a random boundary is generated.
3197
3198     Reference: https://tools.ietf.org/html/rfc7578
3199     '''
3200     has_specified_boundary = boundary is not None
3201
3202     while True:
3203         if boundary is None:
3204             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3205
3206         try:
3207             out, content_type = _multipart_encode_impl(data, boundary)
3208             break
3209         except ValueError:
3210             if has_specified_boundary:
3211                 raise
3212             boundary = None
3213
3214     return out, content_type
3215
3216
3217 def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO_DEFAULT):
3218     if blocked_types is NO_DEFAULT:
3219         blocked_types = (str, bytes, collections.abc.Mapping)
3220     return isinstance(x, allowed_types) and not isinstance(x, blocked_types)
3221
3222
3223 def variadic(x, allowed_types=NO_DEFAULT):
3224     return x if is_iterable_like(x, blocked_types=allowed_types) else (x, )
3225
3226
3227 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3228     for f in funcs:
3229         try:
3230             val = f(*args, **kwargs)
3231         except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
3232             pass
3233         else:
3234             if expected_type is None or isinstance(val, expected_type):
3235                 return val
3236
3237
3238 def try_get(src, getter, expected_type=None):
3239     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3240
3241
3242 def filter_dict(dct, cndn=lambda _, v: v is not None):
3243     return {k: v for k, v in dct.items() if cndn(k, v)}
3244
3245
3246 def merge_dicts(*dicts):
3247     merged = {}
3248     for a_dict in dicts:
3249         for k, v in a_dict.items():
3250             if (v is not None and k not in merged
3251                     or isinstance(v, str) and merged[k] == ''):
3252                 merged[k] = v
3253     return merged
3254
3255
3256 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3257     return string if isinstance(string, str) else str(string, encoding, errors)
3258
3259
3260 US_RATINGS = {
3261     'G': 0,
3262     'PG': 10,
3263     'PG-13': 13,
3264     'R': 16,
3265     'NC': 18,
3266 }
3267
3268
3269 TV_PARENTAL_GUIDELINES = {
3270     'TV-Y': 0,
3271     'TV-Y7': 7,
3272     'TV-G': 0,
3273     'TV-PG': 0,
3274     'TV-14': 14,
3275     'TV-MA': 17,
3276 }
3277
3278
3279 def parse_age_limit(s):
3280     # isinstance(False, int) is True. So type() must be used instead
3281     if type(s) is int:  # noqa: E721
3282         return s if 0 <= s <= 21 else None
3283     elif not isinstance(s, str):
3284         return None
3285     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3286     if m:
3287         return int(m.group('age'))
3288     s = s.upper()
3289     if s in US_RATINGS:
3290         return US_RATINGS[s]
3291     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3292     if m:
3293         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3294     return None
3295
3296
3297 def strip_jsonp(code):
3298     return re.sub(
3299         r'''(?sx)^
3300             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3301             (?:\s*&&\s*(?P=func_name))?
3302             \s*\(\s*(?P<callback_data>.*)\);?
3303             \s*?(?://[^\n]*)*$''',
3304         r'\g<callback_data>', code)
3305
3306
3307 def js_to_json(code, vars={}, *, strict=False):
3308     # vars is a dict of var, val pairs to substitute
3309     STRING_QUOTES = '\'"`'
3310     STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
3311     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3312     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3313     INTEGER_TABLE = (
3314         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3315         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3316     )
3317
3318     def process_escape(match):
3319         JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
3320         escape = match.group(1) or match.group(2)
3321
3322         return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
3323                 else R'\u00' if escape == 'x'
3324                 else '' if escape == '\n'
3325                 else escape)
3326
3327     def template_substitute(match):
3328         evaluated = js_to_json(match.group(1), vars, strict=strict)
3329         if evaluated[0] == '"':
3330             return json.loads(evaluated)
3331         return evaluated
3332
3333     def fix_kv(m):
3334         v = m.group(0)
3335         if v in ('true', 'false', 'null'):
3336             return v
3337         elif v in ('undefined', 'void 0'):
3338             return 'null'
3339         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3340             return ''
3341
3342         if v[0] in STRING_QUOTES:
3343             v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
3344             escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v)
3345             return f'"{escaped}"'
3346
3347         for regex, base in INTEGER_TABLE:
3348             im = re.match(regex, v)
3349             if im:
3350                 i = int(im.group(1), base)
3351                 return f'"{i}":' if v.endswith(':') else str(i)
3352
3353         if v in vars:
3354             try:
3355                 if not strict:
3356                     json.loads(vars[v])
3357             except json.JSONDecodeError:
3358                 return json.dumps(vars[v])
3359             else:
3360                 return vars[v]
3361
3362         if not strict:
3363             return f'"{v}"'
3364
3365         raise ValueError(f'Unknown value: {v}')
3366
3367     def create_map(mobj):
3368         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
3369
3370     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
3371     if not strict:
3372         code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3373         code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
3374         code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
3375         code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)
3376
3377     return re.sub(rf'''(?sx)
3378         {STRING_RE}|
3379         {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
3380         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3381         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
3382         [0-9]+(?={SKIP_RE}:)|
3383         !+
3384         ''', fix_kv, code)
3385
3386
3387 def qualities(quality_ids):
3388     """ Get a numeric quality value out of a list of possible values """
3389     def q(qid):
3390         try:
3391             return quality_ids.index(qid)
3392         except ValueError:
3393             return -1
3394     return q
3395
3396
3397 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3398
3399
3400 DEFAULT_OUTTMPL = {
3401     'default': '%(title)s [%(id)s].%(ext)s',
3402     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3403 }
3404 OUTTMPL_TYPES = {
3405     'chapter': None,
3406     'subtitle': None,
3407     'thumbnail': None,
3408     'description': 'description',
3409     'annotation': 'annotations.xml',
3410     'infojson': 'info.json',
3411     'link': None,
3412     'pl_video': None,
3413     'pl_thumbnail': None,
3414     'pl_description': 'description',
3415     'pl_infojson': 'info.json',
3416 }
3417
3418 # As of [1] format syntax is:
3419 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3420 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3421 STR_FORMAT_RE_TMPL = r'''(?x)
3422     (?<!%)(?P<prefix>(?:%%)*)
3423     %
3424     (?P<has_key>\((?P<key>{0})\))?
3425     (?P<format>
3426         (?P<conversion>[#0\-+ ]+)?
3427         (?P<min_width>\d+)?
3428         (?P<precision>\.\d+)?
3429         (?P<len_mod>[hlL])?  # unused in python
3430         {1}  # conversion type
3431     )
3432 '''
3433
3434
3435 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3436
3437
3438 def limit_length(s, length):
3439     """ Add ellipses to overly long strings """
3440     if s is None:
3441         return None
3442     ELLIPSES = '...'
3443     if len(s) > length:
3444         return s[:length - len(ELLIPSES)] + ELLIPSES
3445     return s
3446
3447
3448 def version_tuple(v):
3449     return tuple(int(e) for e in re.split(r'[-.]', v))
3450
3451
3452 def is_outdated_version(version, limit, assume_new=True):
3453     if not version:
3454         return not assume_new
3455     try:
3456         return version_tuple(version) < version_tuple(limit)
3457     except ValueError:
3458         return not assume_new
3459
3460
3461 def ytdl_is_updateable():
3462     """ Returns if yt-dlp can be updated with -U """
3463
3464     from ..update import is_non_updateable
3465
3466     return not is_non_updateable()
3467
3468
3469 def args_to_str(args):
3470     # Get a short string representation for a subprocess command
3471     return ' '.join(compat_shlex_quote(a) for a in args)
3472
3473
3474 def error_to_str(err):
3475     return f'{type(err).__name__}: {err}'
3476
3477
3478 def mimetype2ext(mt, default=NO_DEFAULT):
3479     if not isinstance(mt, str):
3480         if default is not NO_DEFAULT:
3481             return default
3482         return None
3483
3484     MAP = {
3485         # video
3486         '3gpp': '3gp',
3487         'mp2t': 'ts',
3488         'mp4': 'mp4',
3489         'mpeg': 'mpeg',
3490         'mpegurl': 'm3u8',
3491         'quicktime': 'mov',
3492         'webm': 'webm',
3493         'vp9': 'vp9',
3494         'x-flv': 'flv',
3495         'x-m4v': 'm4v',
3496         'x-matroska': 'mkv',
3497         'x-mng': 'mng',
3498         'x-mp4-fragmented': 'mp4',
3499         'x-ms-asf': 'asf',
3500         'x-ms-wmv': 'wmv',
3501         'x-msvideo': 'avi',
3502
3503         # application (streaming playlists)
3504         'dash+xml': 'mpd',
3505         'f4m+xml': 'f4m',
3506         'hds+xml': 'f4m',
3507         'vnd.apple.mpegurl': 'm3u8',
3508         'vnd.ms-sstr+xml': 'ism',
3509         'x-mpegurl': 'm3u8',
3510
3511         # audio
3512         'audio/mp4': 'm4a',
3513         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
3514         # Using .mp3 as it's the most popular one
3515         'audio/mpeg': 'mp3',
3516         'audio/webm': 'webm',
3517         'audio/x-matroska': 'mka',
3518         'audio/x-mpegurl': 'm3u',
3519         'midi': 'mid',
3520         'ogg': 'ogg',
3521         'wav': 'wav',
3522         'wave': 'wav',
3523         'x-aac': 'aac',
3524         'x-flac': 'flac',
3525         'x-m4a': 'm4a',
3526         'x-realaudio': 'ra',
3527         'x-wav': 'wav',
3528
3529         # image
3530         'avif': 'avif',
3531         'bmp': 'bmp',
3532         'gif': 'gif',
3533         'jpeg': 'jpg',
3534         'png': 'png',
3535         'svg+xml': 'svg',
3536         'tiff': 'tif',
3537         'vnd.wap.wbmp': 'wbmp',
3538         'webp': 'webp',
3539         'x-icon': 'ico',
3540         'x-jng': 'jng',
3541         'x-ms-bmp': 'bmp',
3542
3543         # caption
3544         'filmstrip+json': 'fs',
3545         'smptett+xml': 'tt',
3546         'ttaf+xml': 'dfxp',
3547         'ttml+xml': 'ttml',
3548         'x-ms-sami': 'sami',
3549
3550         # misc
3551         'gzip': 'gz',
3552         'json': 'json',
3553         'xml': 'xml',
3554         'zip': 'zip',
3555     }
3556
3557     mimetype = mt.partition(';')[0].strip().lower()
3558     _, _, subtype = mimetype.rpartition('/')
3559
3560     ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
3561     if ext:
3562         return ext
3563     elif default is not NO_DEFAULT:
3564         return default
3565     return subtype.replace('+', '.')
3566
3567
3568 def ext2mimetype(ext_or_url):
3569     if not ext_or_url:
3570         return None
3571     if '.' not in ext_or_url:
3572         ext_or_url = f'file.{ext_or_url}'
3573     return mimetypes.guess_type(ext_or_url)[0]
3574
3575
3576 def parse_codecs(codecs_str):
3577     # http://tools.ietf.org/html/rfc6381
3578     if not codecs_str:
3579         return {}
3580     split_codecs = list(filter(None, map(
3581         str.strip, codecs_str.strip().strip(',').split(','))))
3582     vcodec, acodec, scodec, hdr = None, None, None, None
3583     for full_codec in split_codecs:
3584         parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
3585         if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3586                         'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3587             if vcodec:
3588                 continue
3589             vcodec = full_codec
3590             if parts[0] in ('dvh1', 'dvhe'):
3591                 hdr = 'DV'
3592             elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
3593                 hdr = 'HDR10'
3594             elif parts[:2] == ['vp9', '2']:
3595                 hdr = 'HDR10'
3596         elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
3597                           'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3598             acodec = acodec or full_codec
3599         elif parts[0] in ('stpp', 'wvtt'):
3600             scodec = scodec or full_codec
3601         else:
3602             write_string(f'WARNING: Unknown codec {full_codec}\n')
3603     if vcodec or acodec or scodec:
3604         return {
3605             'vcodec': vcodec or 'none',
3606             'acodec': acodec or 'none',
3607             'dynamic_range': hdr,
3608             **({'scodec': scodec} if scodec is not None else {}),
3609         }
3610     elif len(split_codecs) == 2:
3611         return {
3612             'vcodec': split_codecs[0],
3613             'acodec': split_codecs[1],
3614         }
3615     return {}
3616
3617
3618 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
3619     assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
3620
3621     allow_mkv = not preferences or 'mkv' in preferences
3622
3623     if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3624         return 'mkv'  # TODO: any other format allows this?
3625
3626     # TODO: All codecs supported by parse_codecs isn't handled here
3627     COMPATIBLE_CODECS = {
3628         'mp4': {
3629             'av1', 'hevc', 'avc1', 'mp4a', 'ac-4',  # fourcc (m3u8, mpd)
3630             'h264', 'aacl', 'ec-3',  # Set in ISM
3631         },
3632         'webm': {
3633             'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3634             'vp9x', 'vp8x',  # in the webm spec
3635         },
3636     }
3637
3638     sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
3639     vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3640
3641     for ext in preferences or COMPATIBLE_CODECS.keys():
3642         codec_set = COMPATIBLE_CODECS.get(ext, set())
3643         if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3644             return ext
3645
3646     COMPATIBLE_EXTS = (
3647         {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3648         {'webm', 'weba'},
3649     )
3650     for ext in preferences or vexts:
3651         current_exts = {ext, *vexts, *aexts}
3652         if ext == 'mkv' or current_exts == {ext} or any(
3653                 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3654             return ext
3655     return 'mkv' if allow_mkv else preferences[-1]
3656
3657
3658 def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
3659     getheader = url_handle.headers.get
3660
3661     cd = getheader('Content-Disposition')
3662     if cd:
3663         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3664         if m:
3665             e = determine_ext(m.group('filename'), default_ext=None)
3666             if e:
3667                 return e
3668
3669     meta_ext = getheader('x-amz-meta-name')
3670     if meta_ext:
3671         e = meta_ext.rpartition('.')[2]
3672         if e:
3673             return e
3674
3675     return mimetype2ext(getheader('Content-Type'), default=default)
3676
3677
3678 def encode_data_uri(data, mime_type):
3679     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3680
3681
3682 def age_restricted(content_limit, age_limit):
3683     """ Returns True iff the content should be blocked """
3684
3685     if age_limit is None:  # No limit set
3686         return False
3687     if content_limit is None:
3688         return False  # Content available for everyone
3689     return age_limit < content_limit
3690
3691
3692 # List of known byte-order-marks (BOM)
3693 BOMS = [
3694     (b'\xef\xbb\xbf', 'utf-8'),
3695     (b'\x00\x00\xfe\xff', 'utf-32-be'),
3696     (b'\xff\xfe\x00\x00', 'utf-32-le'),
3697     (b'\xff\xfe', 'utf-16-le'),
3698     (b'\xfe\xff', 'utf-16-be'),
3699 ]
3700
3701
3702 def is_html(first_bytes):
3703     """ Detect whether a file contains HTML by examining its first bytes. """
3704
3705     encoding = 'utf-8'
3706     for bom, enc in BOMS:
3707         while first_bytes.startswith(bom):
3708             encoding, first_bytes = enc, first_bytes[len(bom):]
3709
3710     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3711
3712
3713 def determine_protocol(info_dict):
3714     protocol = info_dict.get('protocol')
3715     if protocol is not None:
3716         return protocol
3717
3718     url = sanitize_url(info_dict['url'])
3719     if url.startswith('rtmp'):
3720         return 'rtmp'
3721     elif url.startswith('mms'):
3722         return 'mms'
3723     elif url.startswith('rtsp'):
3724         return 'rtsp'
3725
3726     ext = determine_ext(url)
3727     if ext == 'm3u8':
3728         return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
3729     elif ext == 'f4m':
3730         return 'f4m'
3731
3732     return urllib.parse.urlparse(url).scheme
3733
3734
3735 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3736     """ Render a list of rows, each as a list of values.
3737     Text after a \t will be right aligned """
3738     def width(string):
3739         return len(remove_terminal_sequences(string).replace('\t', ''))
3740
3741     def get_max_lens(table):
3742         return [max(width(str(v)) for v in col) for col in zip(*table)]
3743
3744     def filter_using_list(row, filterArray):
3745         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3746
3747     max_lens = get_max_lens(data) if hide_empty else []
3748     header_row = filter_using_list(header_row, max_lens)
3749     data = [filter_using_list(row, max_lens) for row in data]
3750
3751     table = [header_row] + data
3752     max_lens = get_max_lens(table)
3753     extra_gap += 1
3754     if delim:
3755         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3756         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3757     for row in table:
3758         for pos, text in enumerate(map(str, row)):
3759             if '\t' in text:
3760                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3761             else:
3762                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3763     ret = '\n'.join(''.join(row).rstrip() for row in table)
3764     return ret
3765
3766
3767 def _match_one(filter_part, dct, incomplete):
3768     # TODO: Generalize code with YoutubeDL._build_format_filter
3769     STRING_OPERATORS = {
3770         '*=': operator.contains,
3771         '^=': lambda attr, value: attr.startswith(value),
3772         '$=': lambda attr, value: attr.endswith(value),
3773         '~=': lambda attr, value: re.search(value, attr),
3774     }
3775     COMPARISON_OPERATORS = {
3776         **STRING_OPERATORS,
3777         '<=': operator.le,  # "<=" must be defined above "<"
3778         '<': operator.lt,
3779         '>=': operator.ge,
3780         '>': operator.gt,
3781         '=': operator.eq,
3782     }
3783
3784     if isinstance(incomplete, bool):
3785         is_incomplete = lambda _: incomplete
3786     else:
3787         is_incomplete = lambda k: k in incomplete
3788
3789     operator_rex = re.compile(r'''(?x)
3790         (?P<key>[a-z_]+)
3791         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3792         (?:
3793             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3794             (?P<strval>.+?)
3795         )
3796         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3797     m = operator_rex.fullmatch(filter_part.strip())
3798     if m:
3799         m = m.groupdict()
3800         unnegated_op = COMPARISON_OPERATORS[m['op']]
3801         if m['negation']:
3802             op = lambda attr, value: not unnegated_op(attr, value)
3803         else:
3804             op = unnegated_op
3805         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3806         if m['quote']:
3807             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3808         actual_value = dct.get(m['key'])
3809         numeric_comparison = None
3810         if isinstance(actual_value, (int, float)):
3811             # If the original field is a string and matching comparisonvalue is
3812             # a number we should respect the origin of the original field
3813             # and process comparison value as a string (see
3814             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3815             try:
3816                 numeric_comparison = int(comparison_value)
3817             except ValueError:
3818                 numeric_comparison = parse_filesize(comparison_value)
3819                 if numeric_comparison is None:
3820                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3821                 if numeric_comparison is None:
3822                     numeric_comparison = parse_duration(comparison_value)
3823         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3824             raise ValueError('Operator %s only supports string values!' % m['op'])
3825         if actual_value is None:
3826             return is_incomplete(m['key']) or m['none_inclusive']
3827         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3828
3829     UNARY_OPERATORS = {
3830         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3831         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3832     }
3833     operator_rex = re.compile(r'''(?x)
3834         (?P<op>%s)\s*(?P<key>[a-z_]+)
3835         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3836     m = operator_rex.fullmatch(filter_part.strip())
3837     if m:
3838         op = UNARY_OPERATORS[m.group('op')]
3839         actual_value = dct.get(m.group('key'))
3840         if is_incomplete(m.group('key')) and actual_value is None:
3841             return True
3842         return op(actual_value)
3843
3844     raise ValueError('Invalid filter part %r' % filter_part)
3845
3846
3847 def match_str(filter_str, dct, incomplete=False):
3848     """ Filter a dictionary with a simple string syntax.
3849     @returns           Whether the filter passes
3850     @param incomplete  Set of keys that is expected to be missing from dct.
3851                        Can be True/False to indicate all/none of the keys may be missing.
3852                        All conditions on incomplete keys pass if the key is missing
3853     """
3854     return all(
3855         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3856         for filter_part in re.split(r'(?<!\\)&', filter_str))
3857
3858
3859 def match_filter_func(filters, breaking_filters=None):
3860     if not filters and not breaking_filters:
3861         return None
3862     breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
3863     filters = set(variadic(filters or []))
3864
3865     interactive = '-' in filters
3866     if interactive:
3867         filters.remove('-')
3868
3869     def _match_func(info_dict, incomplete=False):
3870         ret = breaking_filters(info_dict, incomplete)
3871         if ret is not None:
3872             raise RejectedVideoReached(ret)
3873
3874         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3875             return NO_DEFAULT if interactive and not incomplete else None
3876         else:
3877             video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
3878             filter_str = ') | ('.join(map(str.strip, filters))
3879             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3880     return _match_func
3881
3882
3883 class download_range_func:
3884     def __init__(self, chapters, ranges):
3885         self.chapters, self.ranges = chapters, ranges
3886
3887     def __call__(self, info_dict, ydl):
3888         if not self.ranges and not self.chapters:
3889             yield {}
3890
3891         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3892                    else 'Cannot match chapters since chapter information is unavailable')
3893         for regex in self.chapters or []:
3894             for i, chapter in enumerate(info_dict.get('chapters') or []):
3895                 if re.search(regex, chapter['title']):
3896                     warning = None
3897                     yield {**chapter, 'index': i}
3898         if self.chapters and warning:
3899             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3900
3901         yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
3902
3903     def __eq__(self, other):
3904         return (isinstance(other, download_range_func)
3905                 and self.chapters == other.chapters and self.ranges == other.ranges)
3906
3907     def __repr__(self):
3908         return f'{__name__}.{type(self).__name__}({self.chapters}, {self.ranges})'
3909
3910
3911 def parse_dfxp_time_expr(time_expr):
3912     if not time_expr:
3913         return
3914
3915     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3916     if mobj:
3917         return float(mobj.group('time_offset'))
3918
3919     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3920     if mobj:
3921         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3922
3923
3924 def srt_subtitles_timecode(seconds):
3925     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3926
3927
3928 def ass_subtitles_timecode(seconds):
3929     time = timetuple_from_msec(seconds * 1000)
3930     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3931
3932
3933 def dfxp2srt(dfxp_data):
3934     '''
3935     @param dfxp_data A bytes-like object containing DFXP data
3936     @returns A unicode object containing converted SRT data
3937     '''
3938     LEGACY_NAMESPACES = (
3939         (b'http://www.w3.org/ns/ttml', [
3940             b'http://www.w3.org/2004/11/ttaf1',
3941             b'http://www.w3.org/2006/04/ttaf1',
3942             b'http://www.w3.org/2006/10/ttaf1',
3943         ]),
3944         (b'http://www.w3.org/ns/ttml#styling', [
3945             b'http://www.w3.org/ns/ttml#style',
3946         ]),
3947     )
3948
3949     SUPPORTED_STYLING = [
3950         'color',
3951         'fontFamily',
3952         'fontSize',
3953         'fontStyle',
3954         'fontWeight',
3955         'textDecoration'
3956     ]
3957
3958     _x = functools.partial(xpath_with_ns, ns_map={
3959         'xml': 'http://www.w3.org/XML/1998/namespace',
3960         'ttml': 'http://www.w3.org/ns/ttml',
3961         'tts': 'http://www.w3.org/ns/ttml#styling',
3962     })
3963
3964     styles = {}
3965     default_style = {}
3966
3967     class TTMLPElementParser:
3968         _out = ''
3969         _unclosed_elements = []
3970         _applied_styles = []
3971
3972         def start(self, tag, attrib):
3973             if tag in (_x('ttml:br'), 'br'):
3974                 self._out += '\n'
3975             else:
3976                 unclosed_elements = []
3977                 style = {}
3978                 element_style_id = attrib.get('style')
3979                 if default_style:
3980                     style.update(default_style)
3981                 if element_style_id:
3982                     style.update(styles.get(element_style_id, {}))
3983                 for prop in SUPPORTED_STYLING:
3984                     prop_val = attrib.get(_x('tts:' + prop))
3985                     if prop_val:
3986                         style[prop] = prop_val
3987                 if style:
3988                     font = ''
3989                     for k, v in sorted(style.items()):
3990                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3991                             continue
3992                         if k == 'color':
3993                             font += ' color="%s"' % v
3994                         elif k == 'fontSize':
3995                             font += ' size="%s"' % v
3996                         elif k == 'fontFamily':
3997                             font += ' face="%s"' % v
3998                         elif k == 'fontWeight' and v == 'bold':
3999                             self._out += '<b>'
4000                             unclosed_elements.append('b')
4001                         elif k == 'fontStyle' and v == 'italic':
4002                             self._out += '<i>'
4003                             unclosed_elements.append('i')
4004                         elif k == 'textDecoration' and v == 'underline':
4005                             self._out += '<u>'
4006                             unclosed_elements.append('u')
4007                     if font:
4008                         self._out += '<font' + font + '>'
4009                         unclosed_elements.append('font')
4010                     applied_style = {}
4011                     if self._applied_styles:
4012                         applied_style.update(self._applied_styles[-1])
4013                     applied_style.update(style)
4014                     self._applied_styles.append(applied_style)
4015                 self._unclosed_elements.append(unclosed_elements)
4016
4017         def end(self, tag):
4018             if tag not in (_x('ttml:br'), 'br'):
4019                 unclosed_elements = self._unclosed_elements.pop()
4020                 for element in reversed(unclosed_elements):
4021                     self._out += '</%s>' % element
4022                 if unclosed_elements and self._applied_styles:
4023                     self._applied_styles.pop()
4024
4025         def data(self, data):
4026             self._out += data
4027
4028         def close(self):
4029             return self._out.strip()
4030
4031     # Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
4032     # This will not trigger false positives since only UTF-8 text is being replaced
4033     dfxp_data = dfxp_data.replace(b'encoding=\'UTF-16\'', b'encoding=\'UTF-8\'')
4034
4035     def parse_node(node):
4036         target = TTMLPElementParser()
4037         parser = xml.etree.ElementTree.XMLParser(target=target)
4038         parser.feed(xml.etree.ElementTree.tostring(node))
4039         return parser.close()
4040
4041     for k, v in LEGACY_NAMESPACES:
4042         for ns in v:
4043             dfxp_data = dfxp_data.replace(ns, k)
4044
4045     dfxp = compat_etree_fromstring(dfxp_data)
4046     out = []
4047     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4048
4049     if not paras:
4050         raise ValueError('Invalid dfxp/TTML subtitle')
4051
4052     repeat = False
4053     while True:
4054         for style in dfxp.findall(_x('.//ttml:style')):
4055             style_id = style.get('id') or style.get(_x('xml:id'))
4056             if not style_id:
4057                 continue
4058             parent_style_id = style.get('style')
4059             if parent_style_id:
4060                 if parent_style_id not in styles:
4061                     repeat = True
4062                     continue
4063                 styles[style_id] = styles[parent_style_id].copy()
4064             for prop in SUPPORTED_STYLING:
4065                 prop_val = style.get(_x('tts:' + prop))
4066                 if prop_val:
4067                     styles.setdefault(style_id, {})[prop] = prop_val
4068         if repeat:
4069             repeat = False
4070         else:
4071             break
4072
4073     for p in ('body', 'div'):
4074         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4075         if ele is None:
4076             continue
4077         style = styles.get(ele.get('style'))
4078         if not style:
4079             continue
4080         default_style.update(style)
4081
4082     for para, index in zip(paras, itertools.count(1)):
4083         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4084         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4085         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4086         if begin_time is None:
4087             continue
4088         if not end_time:
4089             if not dur:
4090                 continue
4091             end_time = begin_time + dur
4092         out.append('%d\n%s --> %s\n%s\n\n' % (
4093             index,
4094             srt_subtitles_timecode(begin_time),
4095             srt_subtitles_timecode(end_time),
4096             parse_node(para)))
4097
4098     return ''.join(out)
4099
4100
4101 def cli_option(params, command_option, param, separator=None):
4102     param = params.get(param)
4103     return ([] if param is None
4104             else [command_option, str(param)] if separator is None
4105             else [f'{command_option}{separator}{param}'])
4106
4107
4108 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4109     param = params.get(param)
4110     assert param in (True, False, None)
4111     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
4112
4113
4114 def cli_valueless_option(params, command_option, param, expected_value=True):
4115     return [command_option] if params.get(param) == expected_value else []
4116
4117
4118 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4119     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4120         if use_compat:
4121             return argdict
4122         else:
4123             argdict = None
4124     if argdict is None:
4125         return default
4126     assert isinstance(argdict, dict)
4127
4128     assert isinstance(keys, (list, tuple))
4129     for key_list in keys:
4130         arg_list = list(filter(
4131             lambda x: x is not None,
4132             [argdict.get(key.lower()) for key in variadic(key_list)]))
4133         if arg_list:
4134             return [arg for args in arg_list for arg in args]
4135     return default
4136
4137
4138 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
4139     main_key, exe = main_key.lower(), exe.lower()
4140     root_key = exe if main_key == exe else f'{main_key}+{exe}'
4141     keys = [f'{root_key}{k}' for k in (keys or [''])]
4142     if root_key in keys:
4143         if main_key != exe:
4144             keys.append((main_key, exe))
4145         keys.append('default')
4146     else:
4147         use_compat = False
4148     return cli_configuration_args(argdict, keys, default, use_compat)
4149
4150
4151 class ISO639Utils:
4152     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4153     _lang_map = {
4154         'aa': 'aar',
4155         'ab': 'abk',
4156         'ae': 'ave',
4157         'af': 'afr',
4158         'ak': 'aka',
4159         'am': 'amh',
4160         'an': 'arg',
4161         'ar': 'ara',
4162         'as': 'asm',
4163         'av': 'ava',
4164         'ay': 'aym',
4165         'az': 'aze',
4166         'ba': 'bak',
4167         'be': 'bel',
4168         'bg': 'bul',
4169         'bh': 'bih',
4170         'bi': 'bis',
4171         'bm': 'bam',
4172         'bn': 'ben',
4173         'bo': 'bod',
4174         'br': 'bre',
4175         'bs': 'bos',
4176         'ca': 'cat',
4177         'ce': 'che',
4178         'ch': 'cha',
4179         'co': 'cos',
4180         'cr': 'cre',
4181         'cs': 'ces',
4182         'cu': 'chu',
4183         'cv': 'chv',
4184         'cy': 'cym',
4185         'da': 'dan',
4186         'de': 'deu',
4187         'dv': 'div',
4188         'dz': 'dzo',
4189         'ee': 'ewe',
4190         'el': 'ell',
4191         'en': 'eng',
4192         'eo': 'epo',
4193         'es': 'spa',
4194         'et': 'est',
4195         'eu': 'eus',
4196         'fa': 'fas',
4197         'ff': 'ful',
4198         'fi': 'fin',
4199         'fj': 'fij',
4200         'fo': 'fao',
4201         'fr': 'fra',
4202         'fy': 'fry',
4203         'ga': 'gle',
4204         'gd': 'gla',
4205         'gl': 'glg',
4206         'gn': 'grn',
4207         'gu': 'guj',
4208         'gv': 'glv',
4209         'ha': 'hau',
4210         'he': 'heb',
4211         'iw': 'heb',  # Replaced by he in 1989 revision
4212         'hi': 'hin',
4213         'ho': 'hmo',
4214         'hr': 'hrv',
4215         'ht': 'hat',
4216         'hu': 'hun',
4217         'hy': 'hye',
4218         'hz': 'her',
4219         'ia': 'ina',
4220         'id': 'ind',
4221         'in': 'ind',  # Replaced by id in 1989 revision
4222         'ie': 'ile',
4223         'ig': 'ibo',
4224         'ii': 'iii',
4225         'ik': 'ipk',
4226         'io': 'ido',
4227         'is': 'isl',
4228         'it': 'ita',
4229         'iu': 'iku',
4230         'ja': 'jpn',
4231         'jv': 'jav',
4232         'ka': 'kat',
4233         'kg': 'kon',
4234         'ki': 'kik',
4235         'kj': 'kua',
4236         'kk': 'kaz',
4237         'kl': 'kal',
4238         'km': 'khm',
4239         'kn': 'kan',
4240         'ko': 'kor',
4241         'kr': 'kau',
4242         'ks': 'kas',
4243         'ku': 'kur',
4244         'kv': 'kom',
4245         'kw': 'cor',
4246         'ky': 'kir',
4247         'la': 'lat',
4248         'lb': 'ltz',
4249         'lg': 'lug',
4250         'li': 'lim',
4251         'ln': 'lin',
4252         'lo': 'lao',
4253         'lt': 'lit',
4254         'lu': 'lub',
4255         'lv': 'lav',
4256         'mg': 'mlg',
4257         'mh': 'mah',
4258         'mi': 'mri',
4259         'mk': 'mkd',
4260         'ml': 'mal',
4261         'mn': 'mon',
4262         'mr': 'mar',
4263         'ms': 'msa',
4264         'mt': 'mlt',
4265         'my': 'mya',
4266         'na': 'nau',
4267         'nb': 'nob',
4268         'nd': 'nde',
4269         'ne': 'nep',
4270         'ng': 'ndo',
4271         'nl': 'nld',
4272         'nn': 'nno',
4273         'no': 'nor',
4274         'nr': 'nbl',
4275         'nv': 'nav',
4276         'ny': 'nya',
4277         'oc': 'oci',
4278         'oj': 'oji',
4279         'om': 'orm',
4280         'or': 'ori',
4281         'os': 'oss',
4282         'pa': 'pan',
4283         'pi': 'pli',
4284         'pl': 'pol',
4285         'ps': 'pus',
4286         'pt': 'por',
4287         'qu': 'que',
4288         'rm': 'roh',
4289         'rn': 'run',
4290         'ro': 'ron',
4291         'ru': 'rus',
4292         'rw': 'kin',
4293         'sa': 'san',
4294         'sc': 'srd',
4295         'sd': 'snd',
4296         'se': 'sme',
4297         'sg': 'sag',
4298         'si': 'sin',
4299         'sk': 'slk',
4300         'sl': 'slv',
4301         'sm': 'smo',
4302         'sn': 'sna',
4303         'so': 'som',
4304         'sq': 'sqi',
4305         'sr': 'srp',
4306         'ss': 'ssw',
4307         'st': 'sot',
4308         'su': 'sun',
4309         'sv': 'swe',
4310         'sw': 'swa',
4311         'ta': 'tam',
4312         'te': 'tel',
4313         'tg': 'tgk',
4314         'th': 'tha',
4315         'ti': 'tir',
4316         'tk': 'tuk',
4317         'tl': 'tgl',
4318         'tn': 'tsn',
4319         'to': 'ton',
4320         'tr': 'tur',
4321         'ts': 'tso',
4322         'tt': 'tat',
4323         'tw': 'twi',
4324         'ty': 'tah',
4325         'ug': 'uig',
4326         'uk': 'ukr',
4327         'ur': 'urd',
4328         'uz': 'uzb',
4329         've': 'ven',
4330         'vi': 'vie',
4331         'vo': 'vol',
4332         'wa': 'wln',
4333         'wo': 'wol',
4334         'xh': 'xho',
4335         'yi': 'yid',
4336         'ji': 'yid',  # Replaced by yi in 1989 revision
4337         'yo': 'yor',
4338         'za': 'zha',
4339         'zh': 'zho',
4340         'zu': 'zul',
4341     }
4342
4343     @classmethod
4344     def short2long(cls, code):
4345         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4346         return cls._lang_map.get(code[:2])
4347
4348     @classmethod
4349     def long2short(cls, code):
4350         """Convert language code from ISO 639-2/T to ISO 639-1"""
4351         for short_name, long_name in cls._lang_map.items():
4352             if long_name == code:
4353                 return short_name
4354
4355
4356 class ISO3166Utils:
4357     # From http://data.okfn.org/data/core/country-list
4358     _country_map = {
4359         'AF': 'Afghanistan',
4360         'AX': 'Åland Islands',
4361         'AL': 'Albania',
4362         'DZ': 'Algeria',
4363         'AS': 'American Samoa',
4364         'AD': 'Andorra',
4365         'AO': 'Angola',
4366         'AI': 'Anguilla',
4367         'AQ': 'Antarctica',
4368         'AG': 'Antigua and Barbuda',
4369         'AR': 'Argentina',
4370         'AM': 'Armenia',
4371         'AW': 'Aruba',
4372         'AU': 'Australia',
4373         'AT': 'Austria',
4374         'AZ': 'Azerbaijan',
4375         'BS': 'Bahamas',
4376         'BH': 'Bahrain',
4377         'BD': 'Bangladesh',
4378         'BB': 'Barbados',
4379         'BY': 'Belarus',
4380         'BE': 'Belgium',
4381         'BZ': 'Belize',
4382         'BJ': 'Benin',
4383         'BM': 'Bermuda',
4384         'BT': 'Bhutan',
4385         'BO': 'Bolivia, Plurinational State of',
4386         'BQ': 'Bonaire, Sint Eustatius and Saba',
4387         'BA': 'Bosnia and Herzegovina',
4388         'BW': 'Botswana',
4389         'BV': 'Bouvet Island',
4390         'BR': 'Brazil',
4391         'IO': 'British Indian Ocean Territory',
4392         'BN': 'Brunei Darussalam',
4393         'BG': 'Bulgaria',
4394         'BF': 'Burkina Faso',
4395         'BI': 'Burundi',
4396         'KH': 'Cambodia',
4397         'CM': 'Cameroon',
4398         'CA': 'Canada',
4399         'CV': 'Cape Verde',
4400         'KY': 'Cayman Islands',
4401         'CF': 'Central African Republic',
4402         'TD': 'Chad',
4403         'CL': 'Chile',
4404         'CN': 'China',
4405         'CX': 'Christmas Island',
4406         'CC': 'Cocos (Keeling) Islands',
4407         'CO': 'Colombia',
4408         'KM': 'Comoros',
4409         'CG': 'Congo',
4410         'CD': 'Congo, the Democratic Republic of the',
4411         'CK': 'Cook Islands',
4412         'CR': 'Costa Rica',
4413         'CI': 'Côte d\'Ivoire',
4414         'HR': 'Croatia',
4415         'CU': 'Cuba',
4416         'CW': 'Curaçao',
4417         'CY': 'Cyprus',
4418         'CZ': 'Czech Republic',
4419         'DK': 'Denmark',
4420         'DJ': 'Djibouti',
4421         'DM': 'Dominica',
4422         'DO': 'Dominican Republic',
4423         'EC': 'Ecuador',
4424         'EG': 'Egypt',
4425         'SV': 'El Salvador',
4426         'GQ': 'Equatorial Guinea',
4427         'ER': 'Eritrea',
4428         'EE': 'Estonia',
4429         'ET': 'Ethiopia',
4430         'FK': 'Falkland Islands (Malvinas)',
4431         'FO': 'Faroe Islands',
4432         'FJ': 'Fiji',
4433         'FI': 'Finland',
4434         'FR': 'France',
4435         'GF': 'French Guiana',
4436         'PF': 'French Polynesia',
4437         'TF': 'French Southern Territories',
4438         'GA': 'Gabon',
4439         'GM': 'Gambia',
4440         'GE': 'Georgia',
4441         'DE': 'Germany',
4442         'GH': 'Ghana',
4443         'GI': 'Gibraltar',
4444         'GR': 'Greece',
4445         'GL': 'Greenland',
4446         'GD': 'Grenada',
4447         'GP': 'Guadeloupe',
4448         'GU': 'Guam',
4449         'GT': 'Guatemala',
4450         'GG': 'Guernsey',
4451         'GN': 'Guinea',
4452         'GW': 'Guinea-Bissau',
4453         'GY': 'Guyana',
4454         'HT': 'Haiti',
4455         'HM': 'Heard Island and McDonald Islands',
4456         'VA': 'Holy See (Vatican City State)',
4457         'HN': 'Honduras',
4458         'HK': 'Hong Kong',
4459         'HU': 'Hungary',
4460         'IS': 'Iceland',
4461         'IN': 'India',
4462         'ID': 'Indonesia',
4463         'IR': 'Iran, Islamic Republic of',
4464         'IQ': 'Iraq',
4465         'IE': 'Ireland',
4466         'IM': 'Isle of Man',
4467         'IL': 'Israel',
4468         'IT': 'Italy',
4469         'JM': 'Jamaica',
4470         'JP': 'Japan',
4471         'JE': 'Jersey',
4472         'JO': 'Jordan',
4473         'KZ': 'Kazakhstan',
4474         'KE': 'Kenya',
4475         'KI': 'Kiribati',
4476         'KP': 'Korea, Democratic People\'s Republic of',
4477         'KR': 'Korea, Republic of',
4478         'KW': 'Kuwait',
4479         'KG': 'Kyrgyzstan',
4480         'LA': 'Lao People\'s Democratic Republic',
4481         'LV': 'Latvia',
4482         'LB': 'Lebanon',
4483         'LS': 'Lesotho',
4484         'LR': 'Liberia',
4485         'LY': 'Libya',
4486         'LI': 'Liechtenstein',
4487         'LT': 'Lithuania',
4488         'LU': 'Luxembourg',
4489         'MO': 'Macao',
4490         'MK': 'Macedonia, the Former Yugoslav Republic of',
4491         'MG': 'Madagascar',
4492         'MW': 'Malawi',
4493         'MY': 'Malaysia',
4494         'MV': 'Maldives',
4495         'ML': 'Mali',
4496         'MT': 'Malta',
4497         'MH': 'Marshall Islands',
4498         'MQ': 'Martinique',
4499         'MR': 'Mauritania',
4500         'MU': 'Mauritius',
4501         'YT': 'Mayotte',
4502         'MX': 'Mexico',
4503         'FM': 'Micronesia, Federated States of',
4504         'MD': 'Moldova, Republic of',
4505         'MC': 'Monaco',
4506         'MN': 'Mongolia',
4507         'ME': 'Montenegro',
4508         'MS': 'Montserrat',
4509         'MA': 'Morocco',
4510         'MZ': 'Mozambique',
4511         'MM': 'Myanmar',
4512         'NA': 'Namibia',
4513         'NR': 'Nauru',
4514         'NP': 'Nepal',
4515         'NL': 'Netherlands',
4516         'NC': 'New Caledonia',
4517         'NZ': 'New Zealand',
4518         'NI': 'Nicaragua',
4519         'NE': 'Niger',
4520         'NG': 'Nigeria',
4521         'NU': 'Niue',
4522         'NF': 'Norfolk Island',
4523         'MP': 'Northern Mariana Islands',
4524         'NO': 'Norway',
4525         'OM': 'Oman',
4526         'PK': 'Pakistan',
4527         'PW': 'Palau',
4528         'PS': 'Palestine, State of',
4529         'PA': 'Panama',
4530         'PG': 'Papua New Guinea',
4531         'PY': 'Paraguay',
4532         'PE': 'Peru',
4533         'PH': 'Philippines',
4534         'PN': 'Pitcairn',
4535         'PL': 'Poland',
4536         'PT': 'Portugal',
4537         'PR': 'Puerto Rico',
4538         'QA': 'Qatar',
4539         'RE': 'Réunion',
4540         'RO': 'Romania',
4541         'RU': 'Russian Federation',
4542         'RW': 'Rwanda',
4543         'BL': 'Saint Barthélemy',
4544         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4545         'KN': 'Saint Kitts and Nevis',
4546         'LC': 'Saint Lucia',
4547         'MF': 'Saint Martin (French part)',
4548         'PM': 'Saint Pierre and Miquelon',
4549         'VC': 'Saint Vincent and the Grenadines',
4550         'WS': 'Samoa',
4551         'SM': 'San Marino',
4552         'ST': 'Sao Tome and Principe',
4553         'SA': 'Saudi Arabia',
4554         'SN': 'Senegal',
4555         'RS': 'Serbia',
4556         'SC': 'Seychelles',
4557         'SL': 'Sierra Leone',
4558         'SG': 'Singapore',
4559         'SX': 'Sint Maarten (Dutch part)',
4560         'SK': 'Slovakia',
4561         'SI': 'Slovenia',
4562         'SB': 'Solomon Islands',
4563         'SO': 'Somalia',
4564         'ZA': 'South Africa',
4565         'GS': 'South Georgia and the South Sandwich Islands',
4566         'SS': 'South Sudan',
4567         'ES': 'Spain',
4568         'LK': 'Sri Lanka',
4569         'SD': 'Sudan',
4570         'SR': 'Suriname',
4571         'SJ': 'Svalbard and Jan Mayen',
4572         'SZ': 'Swaziland',
4573         'SE': 'Sweden',
4574         'CH': 'Switzerland',
4575         'SY': 'Syrian Arab Republic',
4576         'TW': 'Taiwan, Province of China',
4577         'TJ': 'Tajikistan',
4578         'TZ': 'Tanzania, United Republic of',
4579         'TH': 'Thailand',
4580         'TL': 'Timor-Leste',
4581         'TG': 'Togo',
4582         'TK': 'Tokelau',
4583         'TO': 'Tonga',
4584         'TT': 'Trinidad and Tobago',
4585         'TN': 'Tunisia',
4586         'TR': 'Turkey',
4587         'TM': 'Turkmenistan',
4588         'TC': 'Turks and Caicos Islands',
4589         'TV': 'Tuvalu',
4590         'UG': 'Uganda',
4591         'UA': 'Ukraine',
4592         'AE': 'United Arab Emirates',
4593         'GB': 'United Kingdom',
4594         'US': 'United States',
4595         'UM': 'United States Minor Outlying Islands',
4596         'UY': 'Uruguay',
4597         'UZ': 'Uzbekistan',
4598         'VU': 'Vanuatu',
4599         'VE': 'Venezuela, Bolivarian Republic of',
4600         'VN': 'Viet Nam',
4601         'VG': 'Virgin Islands, British',
4602         'VI': 'Virgin Islands, U.S.',
4603         'WF': 'Wallis and Futuna',
4604         'EH': 'Western Sahara',
4605         'YE': 'Yemen',
4606         'ZM': 'Zambia',
4607         'ZW': 'Zimbabwe',
4608         # Not ISO 3166 codes, but used for IP blocks
4609         'AP': 'Asia/Pacific Region',
4610         'EU': 'Europe',
4611     }
4612
4613     @classmethod
4614     def short2full(cls, code):
4615         """Convert an ISO 3166-2 country code to the corresponding full name"""
4616         return cls._country_map.get(code.upper())
4617
4618
4619 class GeoUtils:
4620     # Major IPv4 address blocks per country
4621     _country_ip_map = {
4622         'AD': '46.172.224.0/19',
4623         'AE': '94.200.0.0/13',
4624         'AF': '149.54.0.0/17',
4625         'AG': '209.59.64.0/18',
4626         'AI': '204.14.248.0/21',
4627         'AL': '46.99.0.0/16',
4628         'AM': '46.70.0.0/15',
4629         'AO': '105.168.0.0/13',
4630         'AP': '182.50.184.0/21',
4631         'AQ': '23.154.160.0/24',
4632         'AR': '181.0.0.0/12',
4633         'AS': '202.70.112.0/20',
4634         'AT': '77.116.0.0/14',
4635         'AU': '1.128.0.0/11',
4636         'AW': '181.41.0.0/18',
4637         'AX': '185.217.4.0/22',
4638         'AZ': '5.197.0.0/16',
4639         'BA': '31.176.128.0/17',
4640         'BB': '65.48.128.0/17',
4641         'BD': '114.130.0.0/16',
4642         'BE': '57.0.0.0/8',
4643         'BF': '102.178.0.0/15',
4644         'BG': '95.42.0.0/15',
4645         'BH': '37.131.0.0/17',
4646         'BI': '154.117.192.0/18',
4647         'BJ': '137.255.0.0/16',
4648         'BL': '185.212.72.0/23',
4649         'BM': '196.12.64.0/18',
4650         'BN': '156.31.0.0/16',
4651         'BO': '161.56.0.0/16',
4652         'BQ': '161.0.80.0/20',
4653         'BR': '191.128.0.0/12',
4654         'BS': '24.51.64.0/18',
4655         'BT': '119.2.96.0/19',
4656         'BW': '168.167.0.0/16',
4657         'BY': '178.120.0.0/13',
4658         'BZ': '179.42.192.0/18',
4659         'CA': '99.224.0.0/11',
4660         'CD': '41.243.0.0/16',
4661         'CF': '197.242.176.0/21',
4662         'CG': '160.113.0.0/16',
4663         'CH': '85.0.0.0/13',
4664         'CI': '102.136.0.0/14',
4665         'CK': '202.65.32.0/19',
4666         'CL': '152.172.0.0/14',
4667         'CM': '102.244.0.0/14',
4668         'CN': '36.128.0.0/10',
4669         'CO': '181.240.0.0/12',
4670         'CR': '201.192.0.0/12',
4671         'CU': '152.206.0.0/15',
4672         'CV': '165.90.96.0/19',
4673         'CW': '190.88.128.0/17',
4674         'CY': '31.153.0.0/16',
4675         'CZ': '88.100.0.0/14',
4676         'DE': '53.0.0.0/8',
4677         'DJ': '197.241.0.0/17',
4678         'DK': '87.48.0.0/12',
4679         'DM': '192.243.48.0/20',
4680         'DO': '152.166.0.0/15',
4681         'DZ': '41.96.0.0/12',
4682         'EC': '186.68.0.0/15',
4683         'EE': '90.190.0.0/15',
4684         'EG': '156.160.0.0/11',
4685         'ER': '196.200.96.0/20',
4686         'ES': '88.0.0.0/11',
4687         'ET': '196.188.0.0/14',
4688         'EU': '2.16.0.0/13',
4689         'FI': '91.152.0.0/13',
4690         'FJ': '144.120.0.0/16',
4691         'FK': '80.73.208.0/21',
4692         'FM': '119.252.112.0/20',
4693         'FO': '88.85.32.0/19',
4694         'FR': '90.0.0.0/9',
4695         'GA': '41.158.0.0/15',
4696         'GB': '25.0.0.0/8',
4697         'GD': '74.122.88.0/21',
4698         'GE': '31.146.0.0/16',
4699         'GF': '161.22.64.0/18',
4700         'GG': '62.68.160.0/19',
4701         'GH': '154.160.0.0/12',
4702         'GI': '95.164.0.0/16',
4703         'GL': '88.83.0.0/19',
4704         'GM': '160.182.0.0/15',
4705         'GN': '197.149.192.0/18',
4706         'GP': '104.250.0.0/19',
4707         'GQ': '105.235.224.0/20',
4708         'GR': '94.64.0.0/13',
4709         'GT': '168.234.0.0/16',
4710         'GU': '168.123.0.0/16',
4711         'GW': '197.214.80.0/20',
4712         'GY': '181.41.64.0/18',
4713         'HK': '113.252.0.0/14',
4714         'HN': '181.210.0.0/16',
4715         'HR': '93.136.0.0/13',
4716         'HT': '148.102.128.0/17',
4717         'HU': '84.0.0.0/14',
4718         'ID': '39.192.0.0/10',
4719         'IE': '87.32.0.0/12',
4720         'IL': '79.176.0.0/13',
4721         'IM': '5.62.80.0/20',
4722         'IN': '117.192.0.0/10',
4723         'IO': '203.83.48.0/21',
4724         'IQ': '37.236.0.0/14',
4725         'IR': '2.176.0.0/12',
4726         'IS': '82.221.0.0/16',
4727         'IT': '79.0.0.0/10',
4728         'JE': '87.244.64.0/18',
4729         'JM': '72.27.0.0/17',
4730         'JO': '176.29.0.0/16',
4731         'JP': '133.0.0.0/8',
4732         'KE': '105.48.0.0/12',
4733         'KG': '158.181.128.0/17',
4734         'KH': '36.37.128.0/17',
4735         'KI': '103.25.140.0/22',
4736         'KM': '197.255.224.0/20',
4737         'KN': '198.167.192.0/19',
4738         'KP': '175.45.176.0/22',
4739         'KR': '175.192.0.0/10',
4740         'KW': '37.36.0.0/14',
4741         'KY': '64.96.0.0/15',
4742         'KZ': '2.72.0.0/13',
4743         'LA': '115.84.64.0/18',
4744         'LB': '178.135.0.0/16',
4745         'LC': '24.92.144.0/20',
4746         'LI': '82.117.0.0/19',
4747         'LK': '112.134.0.0/15',
4748         'LR': '102.183.0.0/16',
4749         'LS': '129.232.0.0/17',
4750         'LT': '78.56.0.0/13',
4751         'LU': '188.42.0.0/16',
4752         'LV': '46.109.0.0/16',
4753         'LY': '41.252.0.0/14',
4754         'MA': '105.128.0.0/11',
4755         'MC': '88.209.64.0/18',
4756         'MD': '37.246.0.0/16',
4757         'ME': '178.175.0.0/17',
4758         'MF': '74.112.232.0/21',
4759         'MG': '154.126.0.0/17',
4760         'MH': '117.103.88.0/21',
4761         'MK': '77.28.0.0/15',
4762         'ML': '154.118.128.0/18',
4763         'MM': '37.111.0.0/17',
4764         'MN': '49.0.128.0/17',
4765         'MO': '60.246.0.0/16',
4766         'MP': '202.88.64.0/20',
4767         'MQ': '109.203.224.0/19',
4768         'MR': '41.188.64.0/18',
4769         'MS': '208.90.112.0/22',
4770         'MT': '46.11.0.0/16',
4771         'MU': '105.16.0.0/12',
4772         'MV': '27.114.128.0/18',
4773         'MW': '102.70.0.0/15',
4774         'MX': '187.192.0.0/11',
4775         'MY': '175.136.0.0/13',
4776         'MZ': '197.218.0.0/15',
4777         'NA': '41.182.0.0/16',
4778         'NC': '101.101.0.0/18',
4779         'NE': '197.214.0.0/18',
4780         'NF': '203.17.240.0/22',
4781         'NG': '105.112.0.0/12',
4782         'NI': '186.76.0.0/15',
4783         'NL': '145.96.0.0/11',
4784         'NO': '84.208.0.0/13',
4785         'NP': '36.252.0.0/15',
4786         'NR': '203.98.224.0/19',
4787         'NU': '49.156.48.0/22',
4788         'NZ': '49.224.0.0/14',
4789         'OM': '5.36.0.0/15',
4790         'PA': '186.72.0.0/15',
4791         'PE': '186.160.0.0/14',
4792         'PF': '123.50.64.0/18',
4793         'PG': '124.240.192.0/19',
4794         'PH': '49.144.0.0/13',
4795         'PK': '39.32.0.0/11',
4796         'PL': '83.0.0.0/11',
4797         'PM': '70.36.0.0/20',
4798         'PR': '66.50.0.0/16',
4799         'PS': '188.161.0.0/16',
4800         'PT': '85.240.0.0/13',
4801         'PW': '202.124.224.0/20',
4802         'PY': '181.120.0.0/14',
4803         'QA': '37.210.0.0/15',
4804         'RE': '102.35.0.0/16',
4805         'RO': '79.112.0.0/13',
4806         'RS': '93.86.0.0/15',
4807         'RU': '5.136.0.0/13',
4808         'RW': '41.186.0.0/16',
4809         'SA': '188.48.0.0/13',
4810         'SB': '202.1.160.0/19',
4811         'SC': '154.192.0.0/11',
4812         'SD': '102.120.0.0/13',
4813         'SE': '78.64.0.0/12',
4814         'SG': '8.128.0.0/10',
4815         'SI': '188.196.0.0/14',
4816         'SK': '78.98.0.0/15',
4817         'SL': '102.143.0.0/17',
4818         'SM': '89.186.32.0/19',
4819         'SN': '41.82.0.0/15',
4820         'SO': '154.115.192.0/18',
4821         'SR': '186.179.128.0/17',
4822         'SS': '105.235.208.0/21',
4823         'ST': '197.159.160.0/19',
4824         'SV': '168.243.0.0/16',
4825         'SX': '190.102.0.0/20',
4826         'SY': '5.0.0.0/16',
4827         'SZ': '41.84.224.0/19',
4828         'TC': '65.255.48.0/20',
4829         'TD': '154.68.128.0/19',
4830         'TG': '196.168.0.0/14',
4831         'TH': '171.96.0.0/13',
4832         'TJ': '85.9.128.0/18',
4833         'TK': '27.96.24.0/21',
4834         'TL': '180.189.160.0/20',
4835         'TM': '95.85.96.0/19',
4836         'TN': '197.0.0.0/11',
4837         'TO': '175.176.144.0/21',
4838         'TR': '78.160.0.0/11',
4839         'TT': '186.44.0.0/15',
4840         'TV': '202.2.96.0/19',
4841         'TW': '120.96.0.0/11',
4842         'TZ': '156.156.0.0/14',
4843         'UA': '37.52.0.0/14',
4844         'UG': '102.80.0.0/13',
4845         'US': '6.0.0.0/8',
4846         'UY': '167.56.0.0/13',
4847         'UZ': '84.54.64.0/18',
4848         'VA': '212.77.0.0/19',
4849         'VC': '207.191.240.0/21',
4850         'VE': '186.88.0.0/13',
4851         'VG': '66.81.192.0/20',
4852         'VI': '146.226.0.0/16',
4853         'VN': '14.160.0.0/11',
4854         'VU': '202.80.32.0/20',
4855         'WF': '117.20.32.0/21',
4856         'WS': '202.4.32.0/19',
4857         'YE': '134.35.0.0/16',
4858         'YT': '41.242.116.0/22',
4859         'ZA': '41.0.0.0/11',
4860         'ZM': '102.144.0.0/13',
4861         'ZW': '102.177.192.0/18',
4862     }
4863
4864     @classmethod
4865     def random_ipv4(cls, code_or_block):
4866         if len(code_or_block) == 2:
4867             block = cls._country_ip_map.get(code_or_block.upper())
4868             if not block:
4869                 return None
4870         else:
4871             block = code_or_block
4872         addr, preflen = block.split('/')
4873         addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
4874         addr_max = addr_min | (0xffffffff >> int(preflen))
4875         return str(socket.inet_ntoa(
4876             struct.pack('!L', random.randint(addr_min, addr_max))))
4877
4878
4879 class PerRequestProxyHandler(urllib.request.ProxyHandler):
4880     def __init__(self, proxies=None):
4881         # Set default handlers
4882         for type in ('http', 'https'):
4883             setattr(self, '%s_open' % type,
4884                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4885                         meth(r, proxy, type))
4886         urllib.request.ProxyHandler.__init__(self, proxies)
4887
4888     def proxy_open(self, req, proxy, type):
4889         req_proxy = req.headers.get('Ytdl-request-proxy')
4890         if req_proxy is not None:
4891             proxy = req_proxy
4892             del req.headers['Ytdl-request-proxy']
4893
4894         if proxy == '__noproxy__':
4895             return None  # No Proxy
4896         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4897             req.add_header('Ytdl-socks-proxy', proxy)
4898             # yt-dlp's http/https handlers do wrapping the socket with socks
4899             return None
4900         return urllib.request.ProxyHandler.proxy_open(
4901             self, req, proxy, type)
4902
4903
4904 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4905 # released into Public Domain
4906 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4907
4908 def long_to_bytes(n, blocksize=0):
4909     """long_to_bytes(n:long, blocksize:int) : string
4910     Convert a long integer to a byte string.
4911
4912     If optional blocksize is given and greater than zero, pad the front of the
4913     byte string with binary zeros so that the length is a multiple of
4914     blocksize.
4915     """
4916     # after much testing, this algorithm was deemed to be the fastest
4917     s = b''
4918     n = int(n)
4919     while n > 0:
4920         s = struct.pack('>I', n & 0xffffffff) + s
4921         n = n >> 32
4922     # strip off leading zeros
4923     for i in range(len(s)):
4924         if s[i] != b'\000'[0]:
4925             break
4926     else:
4927         # only happens when n == 0
4928         s = b'\000'
4929         i = 0
4930     s = s[i:]
4931     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4932     # de-padding being done above, but sigh...
4933     if blocksize > 0 and len(s) % blocksize:
4934         s = (blocksize - len(s) % blocksize) * b'\000' + s
4935     return s
4936
4937
4938 def bytes_to_long(s):
4939     """bytes_to_long(string) : long
4940     Convert a byte string to a long integer.
4941
4942     This is (essentially) the inverse of long_to_bytes().
4943     """
4944     acc = 0
4945     length = len(s)
4946     if length % 4:
4947         extra = (4 - length % 4)
4948         s = b'\000' * extra + s
4949         length = length + extra
4950     for i in range(0, length, 4):
4951         acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
4952     return acc
4953
4954
4955 def ohdave_rsa_encrypt(data, exponent, modulus):
4956     '''
4957     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4958
4959     Input:
4960         data: data to encrypt, bytes-like object
4961         exponent, modulus: parameter e and N of RSA algorithm, both integer
4962     Output: hex string of encrypted data
4963
4964     Limitation: supports one block encryption only
4965     '''
4966
4967     payload = int(binascii.hexlify(data[::-1]), 16)
4968     encrypted = pow(payload, exponent, modulus)
4969     return '%x' % encrypted
4970
4971
4972 def pkcs1pad(data, length):
4973     """
4974     Padding input data with PKCS#1 scheme
4975
4976     @param {int[]} data        input data
4977     @param {int}   length      target length
4978     @returns {int[]}           padded data
4979     """
4980     if len(data) > length - 11:
4981         raise ValueError('Input data too long for PKCS#1 padding')
4982
4983     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4984     return [0, 2] + pseudo_random + [0] + data
4985
4986
4987 def _base_n_table(n, table):
4988     if not table and not n:
4989         raise ValueError('Either table or n must be specified')
4990     table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
4991
4992     if n and n != len(table):
4993         raise ValueError(f'base {n} exceeds table length {len(table)}')
4994     return table
4995
4996
4997 def encode_base_n(num, n=None, table=None):
4998     """Convert given int to a base-n string"""
4999     table = _base_n_table(n, table)
5000     if not num:
5001         return table[0]
5002
5003     result, base = '', len(table)
5004     while num:
5005         result = table[num % base] + result
5006         num = num // base
5007     return result
5008
5009
5010 def decode_base_n(string, n=None, table=None):
5011     """Convert given base-n string to int"""
5012     table = {char: index for index, char in enumerate(_base_n_table(n, table))}
5013     result, base = 0, len(table)
5014     for char in string:
5015         result = result * base + table[char]
5016     return result
5017
5018
5019 def decode_packed_codes(code):
5020     mobj = re.search(PACKED_CODES_RE, code)
5021     obfuscated_code, base, count, symbols = mobj.groups()
5022     base = int(base)
5023     count = int(count)
5024     symbols = symbols.split('|')
5025     symbol_table = {}
5026
5027     while count:
5028         count -= 1
5029         base_n_count = encode_base_n(count, base)
5030         symbol_table[base_n_count] = symbols[count] or base_n_count
5031
5032     return re.sub(
5033         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5034         obfuscated_code)
5035
5036
5037 def caesar(s, alphabet, shift):
5038     if shift == 0:
5039         return s
5040     l = len(alphabet)
5041     return ''.join(
5042         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5043         for c in s)
5044
5045
5046 def rot47(s):
5047     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5048
5049
5050 def parse_m3u8_attributes(attrib):
5051     info = {}
5052     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5053         if val.startswith('"'):
5054             val = val[1:-1]
5055         info[key] = val
5056     return info
5057
5058
5059 def urshift(val, n):
5060     return val >> n if val >= 0 else (val + 0x100000000) >> n
5061
5062
5063 def write_xattr(path, key, value):
5064     # Windows: Write xattrs to NTFS Alternate Data Streams:
5065     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5066     if compat_os_name == 'nt':
5067         assert ':' not in key
5068         assert os.path.exists(path)
5069
5070         try:
5071             with open(f'{path}:{key}', 'wb') as f:
5072                 f.write(value)
5073         except OSError as e:
5074             raise XAttrMetadataError(e.errno, e.strerror)
5075         return
5076
5077     # UNIX Method 1. Use xattrs/pyxattrs modules
5078
5079     setxattr = None
5080     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
5081         # Unicode arguments are not supported in pyxattr until version 0.5.0
5082         # See https://github.com/ytdl-org/youtube-dl/issues/5498
5083         if version_tuple(xattr.__version__) >= (0, 5, 0):
5084             setxattr = xattr.set
5085     elif xattr:
5086         setxattr = xattr.setxattr
5087
5088     if setxattr:
5089         try:
5090             setxattr(path, key, value)
5091         except OSError as e:
5092             raise XAttrMetadataError(e.errno, e.strerror)
5093         return
5094
5095     # UNIX Method 2. Use setfattr/xattr executables
5096     exe = ('setfattr' if check_executable('setfattr', ['--version'])
5097            else 'xattr' if check_executable('xattr', ['-h']) else None)
5098     if not exe:
5099         raise XAttrUnavailableError(
5100             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
5101             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
5102
5103     value = value.decode()
5104     try:
5105         _, stderr, returncode = Popen.run(
5106             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
5107             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5108     except OSError as e:
5109         raise XAttrMetadataError(e.errno, e.strerror)
5110     if returncode:
5111         raise XAttrMetadataError(returncode, stderr)
5112
5113
5114 def random_birthday(year_field, month_field, day_field):
5115     start_date = datetime.date(1950, 1, 1)
5116     end_date = datetime.date(1995, 12, 31)
5117     offset = random.randint(0, (end_date - start_date).days)
5118     random_date = start_date + datetime.timedelta(offset)
5119     return {
5120         year_field: str(random_date.year),
5121         month_field: str(random_date.month),
5122         day_field: str(random_date.day),
5123     }
5124
5125
5126 def find_available_port(interface=''):
5127     try:
5128         with socket.socket() as sock:
5129             sock.bind((interface, 0))
5130             return sock.getsockname()[1]
5131     except OSError:
5132         return None
5133
5134
5135 # Templates for internet shortcut files, which are plain text files.
5136 DOT_URL_LINK_TEMPLATE = '''\
5137 [InternetShortcut]
5138 URL=%(url)s
5139 '''
5140
5141 DOT_WEBLOC_LINK_TEMPLATE = '''\
5142 <?xml version="1.0" encoding="UTF-8"?>
5143 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5144 <plist version="1.0">
5145 <dict>
5146 \t<key>URL</key>
5147 \t<string>%(url)s</string>
5148 </dict>
5149 </plist>
5150 '''
5151
5152 DOT_DESKTOP_LINK_TEMPLATE = '''\
5153 [Desktop Entry]
5154 Encoding=UTF-8
5155 Name=%(filename)s
5156 Type=Link
5157 URL=%(url)s
5158 Icon=text-html
5159 '''
5160
5161 LINK_TEMPLATES = {
5162     'url': DOT_URL_LINK_TEMPLATE,
5163     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5164     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5165 }
5166
5167
5168 def iri_to_uri(iri):
5169     """
5170     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5171
5172     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5173     """
5174
5175     iri_parts = urllib.parse.urlparse(iri)
5176
5177     if '[' in iri_parts.netloc:
5178         raise ValueError('IPv6 URIs are not, yet, supported.')
5179         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5180
5181     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5182
5183     net_location = ''
5184     if iri_parts.username:
5185         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5186         if iri_parts.password is not None:
5187             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5188         net_location += '@'
5189
5190     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5191     # The 'idna' encoding produces ASCII text.
5192     if iri_parts.port is not None and iri_parts.port != 80:
5193         net_location += ':' + str(iri_parts.port)
5194
5195     return urllib.parse.urlunparse(
5196         (iri_parts.scheme,
5197             net_location,
5198
5199             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5200
5201             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5202             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5203
5204             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5205             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5206
5207             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5208
5209     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5210
5211
5212 def to_high_limit_path(path):
5213     if sys.platform in ['win32', 'cygwin']:
5214         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5215         return '\\\\?\\' + os.path.abspath(path)
5216
5217     return path
5218
5219
5220 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
5221     val = traversal.traverse_obj(obj, *variadic(field))
5222     if not val if ignore is NO_DEFAULT else val in variadic(ignore):
5223         return default
5224     return template % func(val)
5225
5226
5227 def clean_podcast_url(url):
5228     return re.sub(r'''(?x)
5229         (?:
5230             (?:
5231                 chtbl\.com/track|
5232                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5233                 play\.podtrac\.com
5234             )/[^/]+|
5235             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5236             flex\.acast\.com|
5237             pd(?:
5238                 cn\.co| # https://podcorn.com/analytics-prefix/
5239                 st\.fm # https://podsights.com/docs/
5240             )/e
5241         )/''', '', url)
5242
5243
5244 _HEX_TABLE = '0123456789abcdef'
5245
5246
5247 def random_uuidv4():
5248     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5249
5250
5251 def make_dir(path, to_screen=None):
5252     try:
5253         dn = os.path.dirname(path)
5254         if dn:
5255             os.makedirs(dn, exist_ok=True)
5256         return True
5257     except OSError as err:
5258         if callable(to_screen) is not None:
5259             to_screen(f'unable to create directory {err}')
5260         return False
5261
5262
5263 def get_executable_path():
5264     from ..update import _get_variant_and_executable_path
5265
5266     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5267
5268
5269 def get_user_config_dirs(package_name):
5270     # .config (e.g. ~/.config/package_name)
5271     xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
5272     yield os.path.join(xdg_config_home, package_name)
5273
5274     # appdata (%APPDATA%/package_name)
5275     appdata_dir = os.getenv('appdata')
5276     if appdata_dir:
5277         yield os.path.join(appdata_dir, package_name)
5278
5279     # home (~/.package_name)
5280     yield os.path.join(compat_expanduser('~'), f'.{package_name}')
5281
5282
5283 def get_system_config_dirs(package_name):
5284     # /etc/package_name
5285     yield os.path.join('/etc', package_name)
5286
5287
5288 def time_seconds(**kwargs):
5289     """
5290     Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
5291     """
5292     return time.time() + datetime.timedelta(**kwargs).total_seconds()
5293
5294
5295 # create a JSON Web Signature (jws) with HS256 algorithm
5296 # the resulting format is in JWS Compact Serialization
5297 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5298 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5299 def jwt_encode_hs256(payload_data, key, headers={}):
5300     header_data = {
5301         'alg': 'HS256',
5302         'typ': 'JWT',
5303     }
5304     if headers:
5305         header_data.update(headers)
5306     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5307     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5308     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5309     signature_b64 = base64.b64encode(h.digest())
5310     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5311     return token
5312
5313
5314 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5315 def jwt_decode_hs256(jwt):
5316     header_b64, payload_b64, signature_b64 = jwt.split('.')
5317     # add trailing ='s that may have been stripped, superfluous ='s are ignored
5318     payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
5319     return payload_data
5320
5321
5322 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5323
5324
5325 @functools.cache
5326 def supports_terminal_sequences(stream):
5327     if compat_os_name == 'nt':
5328         if not WINDOWS_VT_MODE:
5329             return False
5330     elif not os.getenv('TERM'):
5331         return False
5332     try:
5333         return stream.isatty()
5334     except BaseException:
5335         return False
5336
5337
5338 def windows_enable_vt_mode():
5339     """Ref: https://bugs.python.org/issue30075 """
5340     if get_windows_version() < (10, 0, 10586):
5341         return
5342
5343     import ctypes
5344     import ctypes.wintypes
5345     import msvcrt
5346
5347     ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
5348
5349     dll = ctypes.WinDLL('kernel32', use_last_error=False)
5350     handle = os.open('CONOUT$', os.O_RDWR)
5351     try:
5352         h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle))
5353         dw_original_mode = ctypes.wintypes.DWORD()
5354         success = dll.GetConsoleMode(h_out, ctypes.byref(dw_original_mode))
5355         if not success:
5356             raise Exception('GetConsoleMode failed')
5357
5358         success = dll.SetConsoleMode(h_out, ctypes.wintypes.DWORD(
5359             dw_original_mode.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING))
5360         if not success:
5361             raise Exception('SetConsoleMode failed')
5362     finally:
5363         os.close(handle)
5364
5365     global WINDOWS_VT_MODE
5366     WINDOWS_VT_MODE = True
5367     supports_terminal_sequences.cache_clear()
5368
5369
5370 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5371
5372
5373 def remove_terminal_sequences(string):
5374     return _terminal_sequences_re.sub('', string)
5375
5376
5377 def number_of_digits(number):
5378     return len('%d' % number)
5379
5380
5381 def join_nonempty(*values, delim='-', from_dict=None):
5382     if from_dict is not None:
5383         values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
5384     return delim.join(map(str, filter(None, values)))
5385
5386
5387 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5388     """
5389     Find the largest format dimensions in terms of video width and, for each thumbnail:
5390     * Modify the URL: Match the width with the provided regex and replace with the former width
5391     * Update dimensions
5392
5393     This function is useful with video services that scale the provided thumbnails on demand
5394     """
5395     _keys = ('width', 'height')
5396     max_dimensions = max(
5397         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5398         default=(0, 0))
5399     if not max_dimensions[0]:
5400         return thumbnails
5401     return [
5402         merge_dicts(
5403             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5404             dict(zip(_keys, max_dimensions)), thumbnail)
5405         for thumbnail in thumbnails
5406     ]
5407
5408
5409 def parse_http_range(range):
5410     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5411     if not range:
5412         return None, None, None
5413     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5414     if not crg:
5415         return None, None, None
5416     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5417
5418
5419 def read_stdin(what):
5420     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5421     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5422     return sys.stdin
5423
5424
5425 def determine_file_encoding(data):
5426     """
5427     Detect the text encoding used
5428     @returns (encoding, bytes to skip)
5429     """
5430
5431     # BOM marks are given priority over declarations
5432     for bom, enc in BOMS:
5433         if data.startswith(bom):
5434             return enc, len(bom)
5435
5436     # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
5437     # We ignore the endianness to get a good enough match
5438     data = data.replace(b'\0', b'')
5439     mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
5440     return mobj.group(1).decode() if mobj else None, 0
5441
5442
5443 class Config:
5444     own_args = None
5445     parsed_args = None
5446     filename = None
5447     __initialized = False
5448
5449     def __init__(self, parser, label=None):
5450         self.parser, self.label = parser, label
5451         self._loaded_paths, self.configs = set(), []
5452
5453     def init(self, args=None, filename=None):
5454         assert not self.__initialized
5455         self.own_args, self.filename = args, filename
5456         return self.load_configs()
5457
5458     def load_configs(self):
5459         directory = ''
5460         if self.filename:
5461             location = os.path.realpath(self.filename)
5462             directory = os.path.dirname(location)
5463             if location in self._loaded_paths:
5464                 return False
5465             self._loaded_paths.add(location)
5466
5467         self.__initialized = True
5468         opts, _ = self.parser.parse_known_args(self.own_args)
5469         self.parsed_args = self.own_args
5470         for location in opts.config_locations or []:
5471             if location == '-':
5472                 if location in self._loaded_paths:
5473                     continue
5474                 self._loaded_paths.add(location)
5475                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5476                 continue
5477             location = os.path.join(directory, expand_path(location))
5478             if os.path.isdir(location):
5479                 location = os.path.join(location, 'yt-dlp.conf')
5480             if not os.path.exists(location):
5481                 self.parser.error(f'config location {location} does not exist')
5482             self.append_config(self.read_file(location), location)
5483         return True
5484
5485     def __str__(self):
5486         label = join_nonempty(
5487             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5488             delim=' ')
5489         return join_nonempty(
5490             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5491             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5492             delim='\n')
5493
5494     @staticmethod
5495     def read_file(filename, default=[]):
5496         try:
5497             optionf = open(filename, 'rb')
5498         except OSError:
5499             return default  # silently skip if file is not present
5500         try:
5501             enc, skip = determine_file_encoding(optionf.read(512))
5502             optionf.seek(skip, io.SEEK_SET)
5503         except OSError:
5504             enc = None  # silently skip read errors
5505         try:
5506             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5507             contents = optionf.read().decode(enc or preferredencoding())
5508             res = shlex.split(contents, comments=True)
5509         except Exception as err:
5510             raise ValueError(f'Unable to parse "{filename}": {err}')
5511         finally:
5512             optionf.close()
5513         return res
5514
5515     @staticmethod
5516     def hide_login_info(opts):
5517         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5518         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5519
5520         def _scrub_eq(o):
5521             m = eqre.match(o)
5522             if m:
5523                 return m.group('key') + '=PRIVATE'
5524             else:
5525                 return o
5526
5527         opts = list(map(_scrub_eq, opts))
5528         for idx, opt in enumerate(opts):
5529             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5530                 opts[idx + 1] = 'PRIVATE'
5531         return opts
5532
5533     def append_config(self, *args, label=None):
5534         config = type(self)(self.parser, label)
5535         config._loaded_paths = self._loaded_paths
5536         if config.init(*args):
5537             self.configs.append(config)
5538
5539     @property
5540     def all_args(self):
5541         for config in reversed(self.configs):
5542             yield from config.all_args
5543         yield from self.parsed_args or []
5544
5545     def parse_known_args(self, **kwargs):
5546         return self.parser.parse_known_args(self.all_args, **kwargs)
5547
5548     def parse_args(self):
5549         return self.parser.parse_args(self.all_args)
5550
5551
5552 class WebSocketsWrapper:
5553     """Wraps websockets module to use in non-async scopes"""
5554     pool = None
5555
5556     def __init__(self, url, headers=None, connect=True):
5557         self.loop = asyncio.new_event_loop()
5558         # XXX: "loop" is deprecated
5559         self.conn = websockets.connect(
5560             url, extra_headers=headers, ping_interval=None,
5561             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5562         if connect:
5563             self.__enter__()
5564         atexit.register(self.__exit__, None, None, None)
5565
5566     def __enter__(self):
5567         if not self.pool:
5568             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5569         return self
5570
5571     def send(self, *args):
5572         self.run_with_loop(self.pool.send(*args), self.loop)
5573
5574     def recv(self, *args):
5575         return self.run_with_loop(self.pool.recv(*args), self.loop)
5576
5577     def __exit__(self, type, value, traceback):
5578         try:
5579             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5580         finally:
5581             self.loop.close()
5582             self._cancel_all_tasks(self.loop)
5583
5584     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5585     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5586     @staticmethod
5587     def run_with_loop(main, loop):
5588         if not asyncio.iscoroutine(main):
5589             raise ValueError(f'a coroutine was expected, got {main!r}')
5590
5591         try:
5592             return loop.run_until_complete(main)
5593         finally:
5594             loop.run_until_complete(loop.shutdown_asyncgens())
5595             if hasattr(loop, 'shutdown_default_executor'):
5596                 loop.run_until_complete(loop.shutdown_default_executor())
5597
5598     @staticmethod
5599     def _cancel_all_tasks(loop):
5600         to_cancel = asyncio.all_tasks(loop)
5601
5602         if not to_cancel:
5603             return
5604
5605         for task in to_cancel:
5606             task.cancel()
5607
5608         # XXX: "loop" is removed in python 3.10+
5609         loop.run_until_complete(
5610             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5611
5612         for task in to_cancel:
5613             if task.cancelled():
5614                 continue
5615             if task.exception() is not None:
5616                 loop.call_exception_handler({
5617                     'message': 'unhandled exception during asyncio.run() shutdown',
5618                     'exception': task.exception(),
5619                     'task': task,
5620                 })
5621
5622
5623 def merge_headers(*dicts):
5624     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5625     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5626
5627
5628 def cached_method(f):
5629     """Cache a method"""
5630     signature = inspect.signature(f)
5631
5632     @functools.wraps(f)
5633     def wrapper(self, *args, **kwargs):
5634         bound_args = signature.bind(self, *args, **kwargs)
5635         bound_args.apply_defaults()
5636         key = tuple(bound_args.arguments.values())[1:]
5637
5638         cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {})
5639         if key not in cache:
5640             cache[key] = f(self, *args, **kwargs)
5641         return cache[key]
5642     return wrapper
5643
5644
5645 class classproperty:
5646     """property access for class methods with optional caching"""
5647     def __new__(cls, func=None, *args, **kwargs):
5648         if not func:
5649             return functools.partial(cls, *args, **kwargs)
5650         return super().__new__(cls)
5651
5652     def __init__(self, func, *, cache=False):
5653         functools.update_wrapper(self, func)
5654         self.func = func
5655         self._cache = {} if cache else None
5656
5657     def __get__(self, _, cls):
5658         if self._cache is None:
5659             return self.func(cls)
5660         elif cls not in self._cache:
5661             self._cache[cls] = self.func(cls)
5662         return self._cache[cls]
5663
5664
5665 class function_with_repr:
5666     def __init__(self, func, repr_=None):
5667         functools.update_wrapper(self, func)
5668         self.func, self.__repr = func, repr_
5669
5670     def __call__(self, *args, **kwargs):
5671         return self.func(*args, **kwargs)
5672
5673     def __repr__(self):
5674         if self.__repr:
5675             return self.__repr
5676         return f'{self.func.__module__}.{self.func.__qualname__}'
5677
5678
5679 class Namespace(types.SimpleNamespace):
5680     """Immutable namespace"""
5681
5682     def __iter__(self):
5683         return iter(self.__dict__.values())
5684
5685     @property
5686     def items_(self):
5687         return self.__dict__.items()
5688
5689
5690 MEDIA_EXTENSIONS = Namespace(
5691     common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
5692     video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
5693     common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5694     audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
5695     thumbnails=('jpg', 'png', 'webp'),
5696     storyboards=('mhtml', ),
5697     subtitles=('srt', 'vtt', 'ass', 'lrc'),
5698     manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5699 )
5700 MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
5701 MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
5702
5703 KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
5704
5705
5706 class RetryManager:
5707     """Usage:
5708         for retry in RetryManager(...):
5709             try:
5710                 ...
5711             except SomeException as err:
5712                 retry.error = err
5713                 continue
5714     """
5715     attempt, _error = 0, None
5716
5717     def __init__(self, _retries, _error_callback, **kwargs):
5718         self.retries = _retries or 0
5719         self.error_callback = functools.partial(_error_callback, **kwargs)
5720
5721     def _should_retry(self):
5722         return self._error is not NO_DEFAULT and self.attempt <= self.retries
5723
5724     @property
5725     def error(self):
5726         if self._error is NO_DEFAULT:
5727             return None
5728         return self._error
5729
5730     @error.setter
5731     def error(self, value):
5732         self._error = value
5733
5734     def __iter__(self):
5735         while self._should_retry():
5736             self.error = NO_DEFAULT
5737             self.attempt += 1
5738             yield self
5739             if self.error:
5740                 self.error_callback(self.error, self.attempt, self.retries)
5741
5742     @staticmethod
5743     def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
5744         """Utility function for reporting retries"""
5745         if count > retries:
5746             if error:
5747                 return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
5748             raise e
5749
5750         if not count:
5751             return warn(e)
5752         elif isinstance(e, ExtractorError):
5753             e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
5754         warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5755
5756         delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
5757         if delay:
5758             info(f'Sleeping {delay:.2f} seconds ...')
5759             time.sleep(delay)
5760
5761
5762 def make_archive_id(ie, video_id):
5763     ie_key = ie if isinstance(ie, str) else ie.ie_key()
5764     return f'{ie_key.lower()} {video_id}'
5765
5766
5767 def truncate_string(s, left, right=0):
5768     assert left > 3 and right >= 0
5769     if s is None or len(s) <= left + right:
5770         return s
5771     return f'{s[:left-3]}...{s[-right:] if right else ""}'
5772
5773
5774 def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
5775     assert 'all' in alias_dict, '"all" alias is required'
5776     requested = list(start or [])
5777     for val in options:
5778         discard = val.startswith('-')
5779         if discard:
5780             val = val[1:]
5781
5782         if val in alias_dict:
5783             val = alias_dict[val] if not discard else [
5784                 i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
5785             # NB: Do not allow regex in aliases for performance
5786             requested = orderedSet_from_options(val, alias_dict, start=requested)
5787             continue
5788
5789         current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
5790                    else [val] if val in alias_dict['all'] else None)
5791         if current is None:
5792             raise ValueError(val)
5793
5794         if discard:
5795             for item in current:
5796                 while item in requested:
5797                     requested.remove(item)
5798         else:
5799             requested.extend(current)
5800
5801     return orderedSet(requested)
5802
5803
5804 class FormatSorter:
5805     regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
5806
5807     default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
5808                'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
5809                'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id')  # These must not be aliases
5810     ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
5811                     'height', 'width', 'proto', 'vext', 'abr', 'aext',
5812                     'fps', 'fs_approx', 'source', 'id')
5813
5814     settings = {
5815         'vcodec': {'type': 'ordered', 'regex': True,
5816                    'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
5817         'acodec': {'type': 'ordered', 'regex': True,
5818                    'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
5819         'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
5820                 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
5821         'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
5822                   'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
5823         'vext': {'type': 'ordered', 'field': 'video_ext',
5824                  'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
5825                  'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
5826         'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
5827                  'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
5828                  'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
5829         'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
5830         'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
5831                        'field': ('vcodec', 'acodec'),
5832                        'function': lambda it: int(any(v != 'none' for v in it))},
5833         'ie_pref': {'priority': True, 'type': 'extractor'},
5834         'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
5835         'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
5836         'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
5837         'quality': {'convert': 'float', 'default': -1},
5838         'filesize': {'convert': 'bytes'},
5839         'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
5840         'id': {'convert': 'string', 'field': 'format_id'},
5841         'height': {'convert': 'float_none'},
5842         'width': {'convert': 'float_none'},
5843         'fps': {'convert': 'float_none'},
5844         'channels': {'convert': 'float_none', 'field': 'audio_channels'},
5845         'tbr': {'convert': 'float_none'},
5846         'vbr': {'convert': 'float_none'},
5847         'abr': {'convert': 'float_none'},
5848         'asr': {'convert': 'float_none'},
5849         'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
5850
5851         'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
5852         'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
5853         'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
5854         'ext': {'type': 'combined', 'field': ('vext', 'aext')},
5855         'res': {'type': 'multiple', 'field': ('height', 'width'),
5856                 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
5857
5858         # Actual field names
5859         'format_id': {'type': 'alias', 'field': 'id'},
5860         'preference': {'type': 'alias', 'field': 'ie_pref'},
5861         'language_preference': {'type': 'alias', 'field': 'lang'},
5862         'source_preference': {'type': 'alias', 'field': 'source'},
5863         'protocol': {'type': 'alias', 'field': 'proto'},
5864         'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
5865         'audio_channels': {'type': 'alias', 'field': 'channels'},
5866
5867         # Deprecated
5868         'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
5869         'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
5870         'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
5871         'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
5872         'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
5873         'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
5874         'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
5875         'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
5876         'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
5877         'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
5878         'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
5879         'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
5880         'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
5881         'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
5882         'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
5883         'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
5884         'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
5885         'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
5886         'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
5887         'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
5888     }
5889
5890     def __init__(self, ydl, field_preference):
5891         self.ydl = ydl
5892         self._order = []
5893         self.evaluate_params(self.ydl.params, field_preference)
5894         if ydl.params.get('verbose'):
5895             self.print_verbose_info(self.ydl.write_debug)
5896
5897     def _get_field_setting(self, field, key):
5898         if field not in self.settings:
5899             if key in ('forced', 'priority'):
5900                 return False
5901             self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
5902                                         'deprecated and may be removed in a future version')
5903             self.settings[field] = {}
5904         propObj = self.settings[field]
5905         if key not in propObj:
5906             type = propObj.get('type')
5907             if key == 'field':
5908                 default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field
5909             elif key == 'convert':
5910                 default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
5911             else:
5912                 default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
5913             propObj[key] = default
5914         return propObj[key]
5915
5916     def _resolve_field_value(self, field, value, convertNone=False):
5917         if value is None:
5918             if not convertNone:
5919                 return None
5920         else:
5921             value = value.lower()
5922         conversion = self._get_field_setting(field, 'convert')
5923         if conversion == 'ignore':
5924             return None
5925         if conversion == 'string':
5926             return value
5927         elif conversion == 'float_none':
5928             return float_or_none(value)
5929         elif conversion == 'bytes':
5930             return parse_bytes(value)
5931         elif conversion == 'order':
5932             order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
5933             use_regex = self._get_field_setting(field, 'regex')
5934             list_length = len(order_list)
5935             empty_pos = order_list.index('') if '' in order_list else list_length + 1
5936             if use_regex and value is not None:
5937                 for i, regex in enumerate(order_list):
5938                     if regex and re.match(regex, value):
5939                         return list_length - i
5940                 return list_length - empty_pos  # not in list
5941             else:  # not regex or  value = None
5942                 return list_length - (order_list.index(value) if value in order_list else empty_pos)
5943         else:
5944             if value.isnumeric():
5945                 return float(value)
5946             else:
5947                 self.settings[field]['convert'] = 'string'
5948                 return value
5949
5950     def evaluate_params(self, params, sort_extractor):
5951         self._use_free_order = params.get('prefer_free_formats', False)
5952         self._sort_user = params.get('format_sort', [])
5953         self._sort_extractor = sort_extractor
5954
5955         def add_item(field, reverse, closest, limit_text):
5956             field = field.lower()
5957             if field in self._order:
5958                 return
5959             self._order.append(field)
5960             limit = self._resolve_field_value(field, limit_text)
5961             data = {
5962                 'reverse': reverse,
5963                 'closest': False if limit is None else closest,
5964                 'limit_text': limit_text,
5965                 'limit': limit}
5966             if field in self.settings:
5967                 self.settings[field].update(data)
5968             else:
5969                 self.settings[field] = data
5970
5971         sort_list = (
5972             tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
5973             + (tuple() if params.get('format_sort_force', False)
5974                 else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
5975             + tuple(self._sort_user) + tuple(sort_extractor) + self.default)
5976
5977         for item in sort_list:
5978             match = re.match(self.regex, item)
5979             if match is None:
5980                 raise ExtractorError('Invalid format sort string "%s" given by extractor' % item)
5981             field = match.group('field')
5982             if field is None:
5983                 continue
5984             if self._get_field_setting(field, 'type') == 'alias':
5985                 alias, field = field, self._get_field_setting(field, 'field')
5986                 if self._get_field_setting(alias, 'deprecated'):
5987                     self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
5988                                                 f'be removed in a future version. Please use {field} instead')
5989             reverse = match.group('reverse') is not None
5990             closest = match.group('separator') == '~'
5991             limit_text = match.group('limit')
5992
5993             has_limit = limit_text is not None
5994             has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
5995             has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
5996
5997             fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
5998             limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
5999             limit_count = len(limits)
6000             for (i, f) in enumerate(fields):
6001                 add_item(f, reverse, closest,
6002                          limits[i] if i < limit_count
6003                          else limits[0] if has_limit and not has_multiple_limits
6004                          else None)
6005
6006     def print_verbose_info(self, write_debug):
6007         if self._sort_user:
6008             write_debug('Sort order given by user: %s' % ', '.join(self._sort_user))
6009         if self._sort_extractor:
6010             write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
6011         write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
6012             '+' if self._get_field_setting(field, 'reverse') else '', field,
6013             '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
6014                           self._get_field_setting(field, 'limit_text'),
6015                           self._get_field_setting(field, 'limit'))
6016             if self._get_field_setting(field, 'limit_text') is not None else '')
6017             for field in self._order if self._get_field_setting(field, 'visible')]))
6018
6019     def _calculate_field_preference_from_value(self, format, field, type, value):
6020         reverse = self._get_field_setting(field, 'reverse')
6021         closest = self._get_field_setting(field, 'closest')
6022         limit = self._get_field_setting(field, 'limit')
6023
6024         if type == 'extractor':
6025             maximum = self._get_field_setting(field, 'max')
6026             if value is None or (maximum is not None and value >= maximum):
6027                 value = -1
6028         elif type == 'boolean':
6029             in_list = self._get_field_setting(field, 'in_list')
6030             not_in_list = self._get_field_setting(field, 'not_in_list')
6031             value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
6032         elif type == 'ordered':
6033             value = self._resolve_field_value(field, value, True)
6034
6035         # try to convert to number
6036         val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
6037         is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
6038         if is_num:
6039             value = val_num
6040
6041         return ((-10, 0) if value is None
6042                 else (1, value, 0) if not is_num  # if a field has mixed strings and numbers, strings are sorted higher
6043                 else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
6044                 else (0, value, 0) if not reverse and (limit is None or value <= limit)
6045                 else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
6046                 else (-1, value, 0))
6047
6048     def _calculate_field_preference(self, format, field):
6049         type = self._get_field_setting(field, 'type')  # extractor, boolean, ordered, field, multiple
6050         get_value = lambda f: format.get(self._get_field_setting(f, 'field'))
6051         if type == 'multiple':
6052             type = 'field'  # Only 'field' is allowed in multiple for now
6053             actual_fields = self._get_field_setting(field, 'field')
6054
6055             value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
6056         else:
6057             value = get_value(field)
6058         return self._calculate_field_preference_from_value(format, field, type, value)
6059
6060     def calculate_preference(self, format):
6061         # Determine missing protocol
6062         if not format.get('protocol'):
6063             format['protocol'] = determine_protocol(format)
6064
6065         # Determine missing ext
6066         if not format.get('ext') and 'url' in format:
6067             format['ext'] = determine_ext(format['url'])
6068         if format.get('vcodec') == 'none':
6069             format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
6070             format['video_ext'] = 'none'
6071         else:
6072             format['video_ext'] = format['ext']
6073             format['audio_ext'] = 'none'
6074         # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'):  # Not supported?
6075         #    format['preference'] = -1000
6076
6077         if format.get('preference') is None and format.get('ext') == 'flv' and re.match('[hx]265|he?vc?', format.get('vcodec') or ''):
6078             # HEVC-over-FLV is out-of-spec by FLV's original spec
6079             # ref. https://trac.ffmpeg.org/ticket/6389
6080             # ref. https://github.com/yt-dlp/yt-dlp/pull/5821
6081             format['preference'] = -100
6082
6083         # Determine missing bitrates
6084         if format.get('tbr') is None:
6085             if format.get('vbr') is not None and format.get('abr') is not None:
6086                 format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
6087         else:
6088             if format.get('vcodec') != 'none' and format.get('vbr') is None:
6089                 format['vbr'] = format.get('tbr') - format.get('abr', 0)
6090             if format.get('acodec') != 'none' and format.get('abr') is None:
6091                 format['abr'] = format.get('tbr') - format.get('vbr', 0)
6092
6093         return tuple(self._calculate_field_preference(format, field) for field in self._order)