yt_dlp/utils.py

   1 import asyncio
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import collections.abc
   9 import contextlib
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import html.entities
  18 import html.parser
  19 import http.client
  20 import http.cookiejar
  21 import importlib.util
  22 import inspect
  23 import io
  24 import itertools
  25 import json
  26 import locale
  27 import math
  28 import mimetypes
  29 import operator
  30 import os
  31 import platform
  32 import random
  33 import re
  34 import shlex
  35 import socket
  36 import ssl
  37 import struct
  38 import subprocess
  39 import sys
  40 import tempfile
  41 import time
  42 import traceback
  43 import types
  44 import unicodedata
  45 import urllib.error
  46 import urllib.parse
  47 import urllib.request
  48 import xml.etree.ElementTree
  49 import zlib
  50
  51 from .compat import functools  # isort: split
  52 from .compat import (
  53     compat_etree_fromstring,
  54     compat_expanduser,
  55     compat_HTMLParseError,
  56     compat_os_name,
  57     compat_shlex_quote,
  58 )
  59 from .dependencies import brotli, certifi, websockets, xattr
  60 from .socks import ProxyType, sockssocket
  61
  62
  63 def register_socks_protocols():
  64     # "Register" SOCKS protocols
  65     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  66     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  67     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  68         if scheme not in urllib.parse.uses_netloc:
  69             urllib.parse.uses_netloc.append(scheme)
  70
  71
  72 # This is not clearly defined otherwise
  73 compiled_regex_type = type(re.compile(''))
  74
  75
  76 def random_user_agent():
  77     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  78     _CHROME_VERSIONS = (
  79         '90.0.4430.212',
  80         '90.0.4430.24',
  81         '90.0.4430.70',
  82         '90.0.4430.72',
  83         '90.0.4430.85',
  84         '90.0.4430.93',
  85         '91.0.4472.101',
  86         '91.0.4472.106',
  87         '91.0.4472.114',
  88         '91.0.4472.124',
  89         '91.0.4472.164',
  90         '91.0.4472.19',
  91         '91.0.4472.77',
  92         '92.0.4515.107',
  93         '92.0.4515.115',
  94         '92.0.4515.131',
  95         '92.0.4515.159',
  96         '92.0.4515.43',
  97         '93.0.4556.0',
  98         '93.0.4577.15',
  99         '93.0.4577.63',
 100         '93.0.4577.82',
 101         '94.0.4606.41',
 102         '94.0.4606.54',
 103         '94.0.4606.61',
 104         '94.0.4606.71',
 105         '94.0.4606.81',
 106         '94.0.4606.85',
 107         '95.0.4638.17',
 108         '95.0.4638.50',
 109         '95.0.4638.54',
 110         '95.0.4638.69',
 111         '95.0.4638.74',
 112         '96.0.4664.18',
 113         '96.0.4664.45',
 114         '96.0.4664.55',
 115         '96.0.4664.93',
 116         '97.0.4692.20',
 117     )
 118     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 119
 120
 121 SUPPORTED_ENCODINGS = [
 122     'gzip', 'deflate'
 123 ]
 124 if brotli:
 125     SUPPORTED_ENCODINGS.append('br')
 126
 127 std_headers = {
 128     'User-Agent': random_user_agent(),
 129     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 130     'Accept-Language': 'en-us,en;q=0.5',
 131     'Sec-Fetch-Mode': 'navigate',
 132 }
 133
 134
 135 USER_AGENTS = {
 136     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 137 }
 138
 139
 140 NO_DEFAULT = object()
 141 IDENTITY = lambda x: x
 142
 143 ENGLISH_MONTH_NAMES = [
 144     'January', 'February', 'March', 'April', 'May', 'June',
 145     'July', 'August', 'September', 'October', 'November', 'December']
 146
 147 MONTH_NAMES = {
 148     'en': ENGLISH_MONTH_NAMES,
 149     'fr': [
 150         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 151         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 152     # these follow the genitive grammatical case (dopełniacz)
 153     # some websites might be using nominative, which will require another month list
 154     # https://en.wikibooks.org/wiki/Polish/Noun_cases
 155     'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
 156            'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
 157 }
 158
 159 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
 160 TIMEZONE_NAMES = {
 161     'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
 162     'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
 163     'EST': -5, 'EDT': -4,  # Eastern
 164     'CST': -6, 'CDT': -5,  # Central
 165     'MST': -7, 'MDT': -6,  # Mountain
 166     'PST': -8, 'PDT': -7   # Pacific
 167 }
 168
 169 # needed for sanitizing filenames in restricted mode
 170 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 171                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 172                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 173
 174 DATE_FORMATS = (
 175     '%d %B %Y',
 176     '%d %b %Y',
 177     '%B %d %Y',
 178     '%B %dst %Y',
 179     '%B %dnd %Y',
 180     '%B %drd %Y',
 181     '%B %dth %Y',
 182     '%b %d %Y',
 183     '%b %dst %Y',
 184     '%b %dnd %Y',
 185     '%b %drd %Y',
 186     '%b %dth %Y',
 187     '%b %dst %Y %I:%M',
 188     '%b %dnd %Y %I:%M',
 189     '%b %drd %Y %I:%M',
 190     '%b %dth %Y %I:%M',
 191     '%Y %m %d',
 192     '%Y-%m-%d',
 193     '%Y.%m.%d.',
 194     '%Y/%m/%d',
 195     '%Y/%m/%d %H:%M',
 196     '%Y/%m/%d %H:%M:%S',
 197     '%Y%m%d%H%M',
 198     '%Y%m%d%H%M%S',
 199     '%Y%m%d',
 200     '%Y-%m-%d %H:%M',
 201     '%Y-%m-%d %H:%M:%S',
 202     '%Y-%m-%d %H:%M:%S.%f',
 203     '%Y-%m-%d %H:%M:%S:%f',
 204     '%d.%m.%Y %H:%M',
 205     '%d.%m.%Y %H.%M',
 206     '%Y-%m-%dT%H:%M:%SZ',
 207     '%Y-%m-%dT%H:%M:%S.%fZ',
 208     '%Y-%m-%dT%H:%M:%S.%f0Z',
 209     '%Y-%m-%dT%H:%M:%S',
 210     '%Y-%m-%dT%H:%M:%S.%f',
 211     '%Y-%m-%dT%H:%M',
 212     '%b %d %Y at %H:%M',
 213     '%b %d %Y at %H:%M:%S',
 214     '%B %d %Y at %H:%M',
 215     '%B %d %Y at %H:%M:%S',
 216     '%H:%M %d-%b-%Y',
 217 )
 218
 219 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 220 DATE_FORMATS_DAY_FIRST.extend([
 221     '%d-%m-%Y',
 222     '%d.%m.%Y',
 223     '%d.%m.%y',
 224     '%d/%m/%Y',
 225     '%d/%m/%y',
 226     '%d/%m/%Y %H:%M:%S',
 227     '%d-%m-%Y %H:%M',
 228 ])
 229
 230 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 231 DATE_FORMATS_MONTH_FIRST.extend([
 232     '%m-%d-%Y',
 233     '%m.%d.%Y',
 234     '%m/%d/%Y',
 235     '%m/%d/%y',
 236     '%m/%d/%Y %H:%M:%S',
 237 ])
 238
 239 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 240 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
 241
 242 NUMBER_RE = r'\d+(?:\.\d+)?'
 243
 244
 245 @functools.cache
 246 def preferredencoding():
 247     """Get preferred encoding.
 248
 249     Returns the best encoding scheme for the system, based on
 250     locale.getpreferredencoding() and some further tweaks.
 251     """
 252     try:
 253         pref = locale.getpreferredencoding()
 254         'TEST'.encode(pref)
 255     except Exception:
 256         pref = 'UTF-8'
 257
 258     return pref
 259
 260
 261 def write_json_file(obj, fn):
 262     """ Encode obj as JSON and write it to fn, atomically if possible """
 263
 264     tf = tempfile.NamedTemporaryFile(
 265         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 266         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 267
 268     try:
 269         with tf:
 270             json.dump(obj, tf, ensure_ascii=False)
 271         if sys.platform == 'win32':
 272             # Need to remove existing file on Windows, else os.rename raises
 273             # WindowsError or FileExistsError.
 274             with contextlib.suppress(OSError):
 275                 os.unlink(fn)
 276         with contextlib.suppress(OSError):
 277             mask = os.umask(0)
 278             os.umask(mask)
 279             os.chmod(tf.name, 0o666 & ~mask)
 280         os.rename(tf.name, fn)
 281     except Exception:
 282         with contextlib.suppress(OSError):
 283             os.remove(tf.name)
 284         raise
 285
 286
 287 def find_xpath_attr(node, xpath, key, val=None):
 288     """ Find the xpath xpath[@key=val] """
 289     assert re.match(r'^[a-zA-Z_-]+$', key)
 290     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 291     return node.find(expr)
 292
 293 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 294 # the namespace parameter
 295
 296
 297 def xpath_with_ns(path, ns_map):
 298     components = [c.split(':') for c in path.split('/')]
 299     replaced = []
 300     for c in components:
 301         if len(c) == 1:
 302             replaced.append(c[0])
 303         else:
 304             ns, tag = c
 305             replaced.append('{%s}%s' % (ns_map[ns], tag))
 306     return '/'.join(replaced)
 307
 308
 309 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 310     def _find_xpath(xpath):
 311         return node.find(xpath)
 312
 313     if isinstance(xpath, str):
 314         n = _find_xpath(xpath)
 315     else:
 316         for xp in xpath:
 317             n = _find_xpath(xp)
 318             if n is not None:
 319                 break
 320
 321     if n is None:
 322         if default is not NO_DEFAULT:
 323             return default
 324         elif fatal:
 325             name = xpath if name is None else name
 326             raise ExtractorError('Could not find XML element %s' % name)
 327         else:
 328             return None
 329     return n
 330
 331
 332 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 333     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 334     if n is None or n == default:
 335         return n
 336     if n.text is None:
 337         if default is not NO_DEFAULT:
 338             return default
 339         elif fatal:
 340             name = xpath if name is None else name
 341             raise ExtractorError('Could not find XML element\'s text %s' % name)
 342         else:
 343             return None
 344     return n.text
 345
 346
 347 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 348     n = find_xpath_attr(node, xpath, key)
 349     if n is None:
 350         if default is not NO_DEFAULT:
 351             return default
 352         elif fatal:
 353             name = f'{xpath}[@{key}]' if name is None else name
 354             raise ExtractorError('Could not find XML attribute %s' % name)
 355         else:
 356             return None
 357     return n.attrib[key]
 358
 359
 360 def get_element_by_id(id, html, **kwargs):
 361     """Return the content of the tag with the specified ID in the passed HTML document"""
 362     return get_element_by_attribute('id', id, html, **kwargs)
 363
 364
 365 def get_element_html_by_id(id, html, **kwargs):
 366     """Return the html of the tag with the specified ID in the passed HTML document"""
 367     return get_element_html_by_attribute('id', id, html, **kwargs)
 368
 369
 370 def get_element_by_class(class_name, html):
 371     """Return the content of the first tag with the specified class in the passed HTML document"""
 372     retval = get_elements_by_class(class_name, html)
 373     return retval[0] if retval else None
 374
 375
 376 def get_element_html_by_class(class_name, html):
 377     """Return the html of the first tag with the specified class in the passed HTML document"""
 378     retval = get_elements_html_by_class(class_name, html)
 379     return retval[0] if retval else None
 380
 381
 382 def get_element_by_attribute(attribute, value, html, **kwargs):
 383     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 384     return retval[0] if retval else None
 385
 386
 387 def get_element_html_by_attribute(attribute, value, html, **kargs):
 388     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 389     return retval[0] if retval else None
 390
 391
 392 def get_elements_by_class(class_name, html, **kargs):
 393     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 394     return get_elements_by_attribute(
 395         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 396         html, escape_value=False)
 397
 398
 399 def get_elements_html_by_class(class_name, html):
 400     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 401     return get_elements_html_by_attribute(
 402         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 403         html, escape_value=False)
 404
 405
 406 def get_elements_by_attribute(*args, **kwargs):
 407     """Return the content of the tag with the specified attribute in the passed HTML document"""
 408     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 409
 410
 411 def get_elements_html_by_attribute(*args, **kwargs):
 412     """Return the html of the tag with the specified attribute in the passed HTML document"""
 413     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 414
 415
 416 def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
 417     """
 418     Return the text (content) and the html (whole) of the tag with the specified
 419     attribute in the passed HTML document
 420     """
 421     if not value:
 422         return
 423
 424     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 425
 426     value = re.escape(value) if escape_value else value
 427
 428     partial_element_re = rf'''(?x)
 429         <(?P<tag>{tag})
 430          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 431          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 432         '''
 433
 434     for m in re.finditer(partial_element_re, html):
 435         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 436
 437         yield (
 438             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 439             whole
 440         )
 441
 442
 443 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
 444     """
 445     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 446     closing tag for the first opening tag it has encountered, and can be used
 447     as a context manager
 448     """
 449
 450     class HTMLBreakOnClosingTagException(Exception):
 451         pass
 452
 453     def __init__(self):
 454         self.tagstack = collections.deque()
 455         html.parser.HTMLParser.__init__(self)
 456
 457     def __enter__(self):
 458         return self
 459
 460     def __exit__(self, *_):
 461         self.close()
 462
 463     def close(self):
 464         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 465         # so data remains buffered; we no longer have any interest in it, thus
 466         # override this method to discard it
 467         pass
 468
 469     def handle_starttag(self, tag, _):
 470         self.tagstack.append(tag)
 471
 472     def handle_endtag(self, tag):
 473         if not self.tagstack:
 474             raise compat_HTMLParseError('no tags in the stack')
 475         while self.tagstack:
 476             inner_tag = self.tagstack.pop()
 477             if inner_tag == tag:
 478                 break
 479         else:
 480             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 481         if not self.tagstack:
 482             raise self.HTMLBreakOnClosingTagException()
 483
 484
 485 # XXX: This should be far less strict
 486 def get_element_text_and_html_by_tag(tag, html):
 487     """
 488     For the first element with the specified tag in the passed HTML document
 489     return its' content (text) and the whole element (html)
 490     """
 491     def find_or_raise(haystack, needle, exc):
 492         try:
 493             return haystack.index(needle)
 494         except ValueError:
 495             raise exc
 496     closing_tag = f'</{tag}>'
 497     whole_start = find_or_raise(
 498         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 499     content_start = find_or_raise(
 500         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 501     content_start += whole_start + 1
 502     with HTMLBreakOnClosingTagParser() as parser:
 503         parser.feed(html[whole_start:content_start])
 504         if not parser.tagstack or parser.tagstack[0] != tag:
 505             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 506         offset = content_start
 507         while offset < len(html):
 508             next_closing_tag_start = find_or_raise(
 509                 html[offset:], closing_tag,
 510                 compat_HTMLParseError(f'closing {tag} tag not found'))
 511             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 512             try:
 513                 parser.feed(html[offset:offset + next_closing_tag_end])
 514                 offset += next_closing_tag_end
 515             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 516                 return html[content_start:offset + next_closing_tag_start], \
 517                     html[whole_start:offset + next_closing_tag_end]
 518         raise compat_HTMLParseError('unexpected end of html')
 519
 520
 521 class HTMLAttributeParser(html.parser.HTMLParser):
 522     """Trivial HTML parser to gather the attributes for a single element"""
 523
 524     def __init__(self):
 525         self.attrs = {}
 526         html.parser.HTMLParser.__init__(self)
 527
 528     def handle_starttag(self, tag, attrs):
 529         self.attrs = dict(attrs)
 530         raise compat_HTMLParseError('done')
 531
 532
 533 class HTMLListAttrsParser(html.parser.HTMLParser):
 534     """HTML parser to gather the attributes for the elements of a list"""
 535
 536     def __init__(self):
 537         html.parser.HTMLParser.__init__(self)
 538         self.items = []
 539         self._level = 0
 540
 541     def handle_starttag(self, tag, attrs):
 542         if tag == 'li' and self._level == 0:
 543             self.items.append(dict(attrs))
 544         self._level += 1
 545
 546     def handle_endtag(self, tag):
 547         self._level -= 1
 548
 549
 550 def extract_attributes(html_element):
 551     """Given a string for an HTML element such as
 552     <el
 553          a="foo" B="bar" c="&98;az" d=boz
 554          empty= noval entity="&amp;"
 555          sq='"' dq="'"
 556     >
 557     Decode and return a dictionary of attributes.
 558     {
 559         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 560         'empty': '', 'noval': None, 'entity': '&',
 561         'sq': '"', 'dq': '\''
 562     }.
 563     """
 564     parser = HTMLAttributeParser()
 565     with contextlib.suppress(compat_HTMLParseError):
 566         parser.feed(html_element)
 567         parser.close()
 568     return parser.attrs
 569
 570
 571 def parse_list(webpage):
 572     """Given a string for an series of HTML <li> elements,
 573     return a dictionary of their attributes"""
 574     parser = HTMLListAttrsParser()
 575     parser.feed(webpage)
 576     parser.close()
 577     return parser.items
 578
 579
 580 def clean_html(html):
 581     """Clean an HTML snippet into a readable string"""
 582
 583     if html is None:  # Convenience for sanitizing descriptions etc.
 584         return html
 585
 586     html = re.sub(r'\s+', ' ', html)
 587     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 588     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 589     # Strip html tags
 590     html = re.sub('<.*?>', '', html)
 591     # Replace html entities
 592     html = unescapeHTML(html)
 593     return html.strip()
 594
 595
 596 class LenientJSONDecoder(json.JSONDecoder):
 597     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 598         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 599         super().__init__(*args, **kwargs)
 600
 601     def decode(self, s):
 602         if self.transform_source:
 603             s = self.transform_source(s)
 604         try:
 605             if self.ignore_extra:
 606                 return self.raw_decode(s.lstrip())[0]
 607             return super().decode(s)
 608         except json.JSONDecodeError as e:
 609             if e.pos is not None:
 610                 raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
 611             raise
 612
 613
 614 def sanitize_open(filename, open_mode):
 615     """Try to open the given filename, and slightly tweak it if this fails.
 616
 617     Attempts to open the given filename. If this fails, it tries to change
 618     the filename slightly, step by step, until it's either able to open it
 619     or it fails and raises a final exception, like the standard open()
 620     function.
 621
 622     It returns the tuple (stream, definitive_file_name).
 623     """
 624     if filename == '-':
 625         if sys.platform == 'win32':
 626             import msvcrt
 627
 628             # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
 629             with contextlib.suppress(io.UnsupportedOperation):
 630                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 631         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 632
 633     for attempt in range(2):
 634         try:
 635             try:
 636                 if sys.platform == 'win32':
 637                     # FIXME: An exclusive lock also locks the file from being read.
 638                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 639                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 640                     raise LockingUnsupportedError()
 641                 stream = locked_file(filename, open_mode, block=False).__enter__()
 642             except OSError:
 643                 stream = open(filename, open_mode)
 644             return stream, filename
 645         except OSError as err:
 646             if attempt or err.errno in (errno.EACCES,):
 647                 raise
 648             old_filename, filename = filename, sanitize_path(filename)
 649             if old_filename == filename:
 650                 raise
 651
 652
 653 def timeconvert(timestr):
 654     """Convert RFC 2822 defined time string into system timestamp"""
 655     timestamp = None
 656     timetuple = email.utils.parsedate_tz(timestr)
 657     if timetuple is not None:
 658         timestamp = email.utils.mktime_tz(timetuple)
 659     return timestamp
 660
 661
 662 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 663     """Sanitizes a string so it could be used as part of a filename.
 664     @param restricted   Use a stricter subset of allowed characters
 665     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 666                         If unset, yt-dlp's new sanitization rules are in effect
 667     """
 668     if s == '':
 669         return ''
 670
 671     def replace_insane(char):
 672         if restricted and char in ACCENT_CHARS:
 673             return ACCENT_CHARS[char]
 674         elif not restricted and char == '\n':
 675             return '\0 '
 676         elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
 677             # Replace with their full-width unicode counterparts
 678             return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
 679         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 680             return ''
 681         elif char == '"':
 682             return '' if restricted else '\''
 683         elif char == ':':
 684             return '\0_\0-' if restricted else '\0 \0-'
 685         elif char in '\\/|*<>':
 686             return '\0_'
 687         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 688             return '\0_'
 689         return char
 690
 691     # Replace look-alike Unicode glyphs
 692     if restricted and (is_id is NO_DEFAULT or not is_id):
 693         s = unicodedata.normalize('NFKC', s)
 694     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 695     result = ''.join(map(replace_insane, s))
 696     if is_id is NO_DEFAULT:
 697         result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
 698         STRIP_RE = r'(?:\0.|[ _-])*'
 699         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 700     result = result.replace('\0', '') or '_'
 701
 702     if not is_id:
 703         while '__' in result:
 704             result = result.replace('__', '_')
 705         result = result.strip('_')
 706         # Common case of "Foreign band name - English song title"
 707         if restricted and result.startswith('-_'):
 708             result = result[2:]
 709         if result.startswith('-'):
 710             result = '_' + result[len('-'):]
 711         result = result.lstrip('.')
 712         if not result:
 713             result = '_'
 714     return result
 715
 716
 717 def sanitize_path(s, force=False):
 718     """Sanitizes and normalizes path on Windows"""
 719     if sys.platform == 'win32':
 720         force = False
 721         drive_or_unc, _ = os.path.splitdrive(s)
 722     elif force:
 723         drive_or_unc = ''
 724     else:
 725         return s
 726
 727     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 728     if drive_or_unc:
 729         norm_path.pop(0)
 730     sanitized_path = [
 731         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 732         for path_part in norm_path]
 733     if drive_or_unc:
 734         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 735     elif force and s and s[0] == os.path.sep:
 736         sanitized_path.insert(0, os.path.sep)
 737     return os.path.join(*sanitized_path)
 738
 739
 740 def sanitize_url(url, *, scheme='http'):
 741     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 742     # the number of unwanted failures due to missing protocol
 743     if url is None:
 744         return
 745     elif url.startswith('//'):
 746         return f'{scheme}:{url}'
 747     # Fix some common typos seen so far
 748     COMMON_TYPOS = (
 749         # https://github.com/ytdl-org/youtube-dl/issues/15649
 750         (r'^httpss://', r'https://'),
 751         # https://bx1.be/lives/direct-tv/
 752         (r'^rmtp([es]?)://', r'rtmp\1://'),
 753     )
 754     for mistake, fixup in COMMON_TYPOS:
 755         if re.match(mistake, url):
 756             return re.sub(mistake, fixup, url)
 757     return url
 758
 759
 760 def extract_basic_auth(url):
 761     parts = urllib.parse.urlsplit(url)
 762     if parts.username is None:
 763         return url, None
 764     url = urllib.parse.urlunsplit(parts._replace(netloc=(
 765         parts.hostname if parts.port is None
 766         else '%s:%d' % (parts.hostname, parts.port))))
 767     auth_payload = base64.b64encode(
 768         ('%s:%s' % (parts.username, parts.password or '')).encode())
 769     return url, f'Basic {auth_payload.decode()}'
 770
 771
 772 def sanitized_Request(url, *args, **kwargs):
 773     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 774     if auth_header is not None:
 775         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 776         headers['Authorization'] = auth_header
 777     return urllib.request.Request(url, *args, **kwargs)
 778
 779
 780 def expand_path(s):
 781     """Expand shell variables and ~"""
 782     return os.path.expandvars(compat_expanduser(s))
 783
 784
 785 def orderedSet(iterable, *, lazy=False):
 786     """Remove all duplicates from the input iterable"""
 787     def _iter():
 788         seen = []  # Do not use set since the items can be unhashable
 789         for x in iterable:
 790             if x not in seen:
 791                 seen.append(x)
 792                 yield x
 793
 794     return _iter() if lazy else list(_iter())
 795
 796
 797 def _htmlentity_transform(entity_with_semicolon):
 798     """Transforms an HTML entity to a character."""
 799     entity = entity_with_semicolon[:-1]
 800
 801     # Known non-numeric HTML entity
 802     if entity in html.entities.name2codepoint:
 803         return chr(html.entities.name2codepoint[entity])
 804
 805     # TODO: HTML5 allows entities without a semicolon.
 806     # E.g. '&Eacuteric' should be decoded as 'Éric'.
 807     if entity_with_semicolon in html.entities.html5:
 808         return html.entities.html5[entity_with_semicolon]
 809
 810     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 811     if mobj is not None:
 812         numstr = mobj.group(1)
 813         if numstr.startswith('x'):
 814             base = 16
 815             numstr = '0%s' % numstr
 816         else:
 817             base = 10
 818         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 819         with contextlib.suppress(ValueError):
 820             return chr(int(numstr, base))
 821
 822     # Unknown entity in name, return its literal representation
 823     return '&%s;' % entity
 824
 825
 826 def unescapeHTML(s):
 827     if s is None:
 828         return None
 829     assert isinstance(s, str)
 830
 831     return re.sub(
 832         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 833
 834
 835 def escapeHTML(text):
 836     return (
 837         text
 838         .replace('&', '&amp;')
 839         .replace('<', '&lt;')
 840         .replace('>', '&gt;')
 841         .replace('"', '&quot;')
 842         .replace("'", '&#39;')
 843     )
 844
 845
 846 def process_communicate_or_kill(p, *args, **kwargs):
 847     deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
 848                         f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
 849     return Popen.communicate_or_kill(p, *args, **kwargs)
 850
 851
 852 class Popen(subprocess.Popen):
 853     if sys.platform == 'win32':
 854         _startupinfo = subprocess.STARTUPINFO()
 855         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 856     else:
 857         _startupinfo = None
 858
 859     @staticmethod
 860     def _fix_pyinstaller_ld_path(env):
 861         """Restore LD_LIBRARY_PATH when using PyInstaller
 862             Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
 863                  https://github.com/yt-dlp/yt-dlp/issues/4573
 864         """
 865         if not hasattr(sys, '_MEIPASS'):
 866             return
 867
 868         def _fix(key):
 869             orig = env.get(f'{key}_ORIG')
 870             if orig is None:
 871                 env.pop(key, None)
 872             else:
 873                 env[key] = orig
 874
 875         _fix('LD_LIBRARY_PATH')  # Linux
 876         _fix('DYLD_LIBRARY_PATH')  # macOS
 877
 878     def __init__(self, *args, env=None, text=False, **kwargs):
 879         if env is None:
 880             env = os.environ.copy()
 881         self._fix_pyinstaller_ld_path(env)
 882
 883         if text is True:
 884             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 885             kwargs.setdefault('encoding', 'utf-8')
 886             kwargs.setdefault('errors', 'replace')
 887         super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo)
 888
 889     def communicate_or_kill(self, *args, **kwargs):
 890         try:
 891             return self.communicate(*args, **kwargs)
 892         except BaseException:  # Including KeyboardInterrupt
 893             self.kill(timeout=None)
 894             raise
 895
 896     def kill(self, *, timeout=0):
 897         super().kill()
 898         if timeout != 0:
 899             self.wait(timeout=timeout)
 900
 901     @classmethod
 902     def run(cls, *args, timeout=None, **kwargs):
 903         with cls(*args, **kwargs) as proc:
 904             default = '' if proc.text_mode else b''
 905             stdout, stderr = proc.communicate_or_kill(timeout=timeout)
 906             return stdout or default, stderr or default, proc.returncode
 907
 908
 909 def get_subprocess_encoding():
 910     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 911         # For subprocess calls, encode with locale encoding
 912         # Refer to http://stackoverflow.com/a/9951851/35070
 913         encoding = preferredencoding()
 914     else:
 915         encoding = sys.getfilesystemencoding()
 916     if encoding is None:
 917         encoding = 'utf-8'
 918     return encoding
 919
 920
 921 def encodeFilename(s, for_subprocess=False):
 922     assert isinstance(s, str)
 923     return s
 924
 925
 926 def decodeFilename(b, for_subprocess=False):
 927     return b
 928
 929
 930 def encodeArgument(s):
 931     # Legacy code that uses byte strings
 932     # Uncomment the following line after fixing all post processors
 933     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
 934     return s if isinstance(s, str) else s.decode('ascii')
 935
 936
 937 def decodeArgument(b):
 938     return b
 939
 940
 941 def decodeOption(optval):
 942     if optval is None:
 943         return optval
 944     if isinstance(optval, bytes):
 945         optval = optval.decode(preferredencoding())
 946
 947     assert isinstance(optval, str)
 948     return optval
 949
 950
 951 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 952
 953
 954 def timetuple_from_msec(msec):
 955     secs, msec = divmod(msec, 1000)
 956     mins, secs = divmod(secs, 60)
 957     hrs, mins = divmod(mins, 60)
 958     return _timetuple(hrs, mins, secs, msec)
 959
 960
 961 def formatSeconds(secs, delim=':', msec=False):
 962     time = timetuple_from_msec(secs * 1000)
 963     if time.hours:
 964         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 965     elif time.minutes:
 966         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 967     else:
 968         ret = '%d' % time.seconds
 969     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 970
 971
 972 def _ssl_load_windows_store_certs(ssl_context, storename):
 973     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 974     try:
 975         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 976                  if encoding == 'x509_asn' and (
 977                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 978     except PermissionError:
 979         return
 980     for cert in certs:
 981         with contextlib.suppress(ssl.SSLError):
 982             ssl_context.load_verify_locations(cadata=cert)
 983
 984
 985 def make_HTTPS_handler(params, **kwargs):
 986     opts_check_certificate = not params.get('nocheckcertificate')
 987     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 988     context.check_hostname = opts_check_certificate
 989     if params.get('legacyserverconnect'):
 990         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 991         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 992         context.set_ciphers('DEFAULT')
 993     elif (
 994         sys.version_info < (3, 10)
 995         and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1)
 996         and not ssl.OPENSSL_VERSION.startswith('LibreSSL')
 997     ):
 998         # Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1].
 999         # This is to ensure consistent behavior across Python versions, and help avoid fingerprinting
1000         # in some situations [2][3].
1001         # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely
1002         # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe.
1003         # LibreSSL is excluded until further investigation due to cipher support issues [5][6].
1004         # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536
1005         # 2. https://github.com/yt-dlp/yt-dlp/issues/4627
1006         # 3. https://github.com/yt-dlp/yt-dlp/pull/5294
1007         # 4. https://peps.python.org/pep-0644/
1008         # 5. https://peps.python.org/pep-0644/#libressl-support
1009         # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368
1010         context.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM')
1011         context.minimum_version = ssl.TLSVersion.TLSv1_2
1012
1013     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
1014     if opts_check_certificate:
1015         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
1016             context.load_verify_locations(cafile=certifi.where())
1017         else:
1018             try:
1019                 context.load_default_certs()
1020                 # Work around the issue in load_default_certs when there are bad certificates. See:
1021                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
1022                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
1023             except ssl.SSLError:
1024                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
1025                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
1026                     for storename in ('CA', 'ROOT'):
1027                         _ssl_load_windows_store_certs(context, storename)
1028                 context.set_default_verify_paths()
1029
1030     client_certfile = params.get('client_certificate')
1031     if client_certfile:
1032         try:
1033             context.load_cert_chain(
1034                 client_certfile, keyfile=params.get('client_certificate_key'),
1035                 password=params.get('client_certificate_password'))
1036         except ssl.SSLError:
1037             raise YoutubeDLError('Unable to load client certificate')
1038
1039     # Some servers may reject requests if ALPN extension is not sent. See:
1040     # https://github.com/python/cpython/issues/85140
1041     # https://github.com/yt-dlp/yt-dlp/issues/3878
1042     with contextlib.suppress(NotImplementedError):
1043         context.set_alpn_protocols(['http/1.1'])
1044
1045     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
1046
1047
1048 def bug_reports_message(before=';'):
1049     from .update import REPOSITORY
1050
1051     msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
1052            'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')
1053
1054     before = before.rstrip()
1055     if not before or before.endswith(('.', '!', '?')):
1056         msg = msg[0].title() + msg[1:]
1057
1058     return (before + ' ' if before else '') + msg
1059
1060
1061 class YoutubeDLError(Exception):
1062     """Base exception for YoutubeDL errors."""
1063     msg = None
1064
1065     def __init__(self, msg=None):
1066         if msg is not None:
1067             self.msg = msg
1068         elif self.msg is None:
1069             self.msg = type(self).__name__
1070         super().__init__(self.msg)
1071
1072
1073 network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
1074 if hasattr(ssl, 'CertificateError'):
1075     network_exceptions.append(ssl.CertificateError)
1076 network_exceptions = tuple(network_exceptions)
1077
1078
1079 class ExtractorError(YoutubeDLError):
1080     """Error during info extraction."""
1081
1082     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1083         """ tb, if given, is the original traceback (so that it can be printed out).
1084         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1085         """
1086         if sys.exc_info()[0] in network_exceptions:
1087             expected = True
1088
1089         self.orig_msg = str(msg)
1090         self.traceback = tb
1091         self.expected = expected
1092         self.cause = cause
1093         self.video_id = video_id
1094         self.ie = ie
1095         self.exc_info = sys.exc_info()  # preserve original exception
1096         if isinstance(self.exc_info[1], ExtractorError):
1097             self.exc_info = self.exc_info[1].exc_info
1098
1099         super().__init__(''.join((
1100             format_field(ie, None, '[%s] '),
1101             format_field(video_id, None, '%s: '),
1102             msg,
1103             format_field(cause, None, ' (caused by %r)'),
1104             '' if expected else bug_reports_message())))
1105
1106     def format_traceback(self):
1107         return join_nonempty(
1108             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1109             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1110             delim='\n') or None
1111
1112
1113 class UnsupportedError(ExtractorError):
1114     def __init__(self, url):
1115         super().__init__(
1116             'Unsupported URL: %s' % url, expected=True)
1117         self.url = url
1118
1119
1120 class RegexNotFoundError(ExtractorError):
1121     """Error when a regex didn't match"""
1122     pass
1123
1124
1125 class GeoRestrictedError(ExtractorError):
1126     """Geographic restriction Error exception.
1127
1128     This exception may be thrown when a video is not available from your
1129     geographic location due to geographic restrictions imposed by a website.
1130     """
1131
1132     def __init__(self, msg, countries=None, **kwargs):
1133         kwargs['expected'] = True
1134         super().__init__(msg, **kwargs)
1135         self.countries = countries
1136
1137
1138 class UserNotLive(ExtractorError):
1139     """Error when a channel/user is not live"""
1140
1141     def __init__(self, msg=None, **kwargs):
1142         kwargs['expected'] = True
1143         super().__init__(msg or 'The channel is not currently live', **kwargs)
1144
1145
1146 class DownloadError(YoutubeDLError):
1147     """Download Error exception.
1148
1149     This exception may be thrown by FileDownloader objects if they are not
1150     configured to continue on errors. They will contain the appropriate
1151     error message.
1152     """
1153
1154     def __init__(self, msg, exc_info=None):
1155         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1156         super().__init__(msg)
1157         self.exc_info = exc_info
1158
1159
1160 class EntryNotInPlaylist(YoutubeDLError):
1161     """Entry not in playlist exception.
1162
1163     This exception will be thrown by YoutubeDL when a requested entry
1164     is not found in the playlist info_dict
1165     """
1166     msg = 'Entry not found in info'
1167
1168
1169 class SameFileError(YoutubeDLError):
1170     """Same File exception.
1171
1172     This exception will be thrown by FileDownloader objects if they detect
1173     multiple files would have to be downloaded to the same file on disk.
1174     """
1175     msg = 'Fixed output name but more than one file to download'
1176
1177     def __init__(self, filename=None):
1178         if filename is not None:
1179             self.msg += f': {filename}'
1180         super().__init__(self.msg)
1181
1182
1183 class PostProcessingError(YoutubeDLError):
1184     """Post Processing exception.
1185
1186     This exception may be raised by PostProcessor's .run() method to
1187     indicate an error in the postprocessing task.
1188     """
1189
1190
1191 class DownloadCancelled(YoutubeDLError):
1192     """ Exception raised when the download queue should be interrupted """
1193     msg = 'The download was cancelled'
1194
1195
1196 class ExistingVideoReached(DownloadCancelled):
1197     """ --break-on-existing triggered """
1198     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1199
1200
1201 class RejectedVideoReached(DownloadCancelled):
1202     """ --break-on-reject triggered """
1203     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1204
1205
1206 class MaxDownloadsReached(DownloadCancelled):
1207     """ --max-downloads limit has been reached. """
1208     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1209
1210
1211 class ReExtractInfo(YoutubeDLError):
1212     """ Video info needs to be re-extracted. """
1213
1214     def __init__(self, msg, expected=False):
1215         super().__init__(msg)
1216         self.expected = expected
1217
1218
1219 class ThrottledDownload(ReExtractInfo):
1220     """ Download speed below --throttled-rate. """
1221     msg = 'The download speed is below throttle limit'
1222
1223     def __init__(self):
1224         super().__init__(self.msg, expected=False)
1225
1226
1227 class UnavailableVideoError(YoutubeDLError):
1228     """Unavailable Format exception.
1229
1230     This exception will be thrown when a video is requested
1231     in a format that is not available for that video.
1232     """
1233     msg = 'Unable to download video'
1234
1235     def __init__(self, err=None):
1236         if err is not None:
1237             self.msg += f': {err}'
1238         super().__init__(self.msg)
1239
1240
1241 class ContentTooShortError(YoutubeDLError):
1242     """Content Too Short exception.
1243
1244     This exception may be raised by FileDownloader objects when a file they
1245     download is too small for what the server announced first, indicating
1246     the connection was probably interrupted.
1247     """
1248
1249     def __init__(self, downloaded, expected):
1250         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1251         # Both in bytes
1252         self.downloaded = downloaded
1253         self.expected = expected
1254
1255
1256 class XAttrMetadataError(YoutubeDLError):
1257     def __init__(self, code=None, msg='Unknown error'):
1258         super().__init__(msg)
1259         self.code = code
1260         self.msg = msg
1261
1262         # Parsing code and msg
1263         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1264                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1265             self.reason = 'NO_SPACE'
1266         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1267             self.reason = 'VALUE_TOO_LONG'
1268         else:
1269             self.reason = 'NOT_SUPPORTED'
1270
1271
1272 class XAttrUnavailableError(YoutubeDLError):
1273     pass
1274
1275
1276 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1277     hc = http_class(*args, **kwargs)
1278     source_address = ydl_handler._params.get('source_address')
1279
1280     if source_address is not None:
1281         # This is to workaround _create_connection() from socket where it will try all
1282         # address data from getaddrinfo() including IPv6. This filters the result from
1283         # getaddrinfo() based on the source_address value.
1284         # This is based on the cpython socket.create_connection() function.
1285         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1286         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1287             host, port = address
1288             err = None
1289             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1290             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1291             ip_addrs = [addr for addr in addrs if addr[0] == af]
1292             if addrs and not ip_addrs:
1293                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1294                 raise OSError(
1295                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1296                     % (ip_version, source_address[0]))
1297             for res in ip_addrs:
1298                 af, socktype, proto, canonname, sa = res
1299                 sock = None
1300                 try:
1301                     sock = socket.socket(af, socktype, proto)
1302                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1303                         sock.settimeout(timeout)
1304                     sock.bind(source_address)
1305                     sock.connect(sa)
1306                     err = None  # Explicitly break reference cycle
1307                     return sock
1308                 except OSError as _:
1309                     err = _
1310                     if sock is not None:
1311                         sock.close()
1312             if err is not None:
1313                 raise err
1314             else:
1315                 raise OSError('getaddrinfo returns an empty list')
1316         if hasattr(hc, '_create_connection'):
1317             hc._create_connection = _create_connection
1318         hc.source_address = (source_address, 0)
1319
1320     return hc
1321
1322
1323 def handle_youtubedl_headers(headers):
1324     filtered_headers = headers
1325
1326     if 'Youtubedl-no-compression' in filtered_headers:
1327         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1328         del filtered_headers['Youtubedl-no-compression']
1329
1330     return filtered_headers
1331
1332
1333 class YoutubeDLHandler(urllib.request.HTTPHandler):
1334     """Handler for HTTP requests and responses.
1335
1336     This class, when installed with an OpenerDirector, automatically adds
1337     the standard headers to every HTTP request and handles gzipped and
1338     deflated responses from web servers. If compression is to be avoided in
1339     a particular request, the original request in the program code only has
1340     to include the HTTP header "Youtubedl-no-compression", which will be
1341     removed before making the real request.
1342
1343     Part of this code was copied from:
1344
1345     http://techknack.net/python-urllib2-handlers/
1346
1347     Andrew Rowls, the author of that code, agreed to release it to the
1348     public domain.
1349     """
1350
1351     def __init__(self, params, *args, **kwargs):
1352         urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
1353         self._params = params
1354
1355     def http_open(self, req):
1356         conn_class = http.client.HTTPConnection
1357
1358         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1359         if socks_proxy:
1360             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1361             del req.headers['Ytdl-socks-proxy']
1362
1363         return self.do_open(functools.partial(
1364             _create_http_connection, self, conn_class, False),
1365             req)
1366
1367     @staticmethod
1368     def deflate(data):
1369         if not data:
1370             return data
1371         try:
1372             return zlib.decompress(data, -zlib.MAX_WBITS)
1373         except zlib.error:
1374             return zlib.decompress(data)
1375
1376     @staticmethod
1377     def brotli(data):
1378         if not data:
1379             return data
1380         return brotli.decompress(data)
1381
1382     def http_request(self, req):
1383         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1384         # always respected by websites, some tend to give out URLs with non percent-encoded
1385         # non-ASCII characters (see telemb.py, ard.py [#3412])
1386         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1387         # To work around aforementioned issue we will replace request's original URL with
1388         # percent-encoded one
1389         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1390         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1391         url = req.get_full_url()
1392         url_escaped = escape_url(url)
1393
1394         # Substitute URL if any change after escaping
1395         if url != url_escaped:
1396             req = update_Request(req, url=url_escaped)
1397
1398         for h, v in self._params.get('http_headers', std_headers).items():
1399             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1400             # The dict keys are capitalized because of this bug by urllib
1401             if h.capitalize() not in req.headers:
1402                 req.add_header(h, v)
1403
1404         if 'Accept-encoding' not in req.headers:
1405             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1406
1407         req.headers = handle_youtubedl_headers(req.headers)
1408
1409         return super().do_request_(req)
1410
1411     def http_response(self, req, resp):
1412         old_resp = resp
1413         # gzip
1414         if resp.headers.get('Content-encoding', '') == 'gzip':
1415             content = resp.read()
1416             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1417             try:
1418                 uncompressed = io.BytesIO(gz.read())
1419             except OSError as original_ioerror:
1420                 # There may be junk add the end of the file
1421                 # See http://stackoverflow.com/q/4928560/35070 for details
1422                 for i in range(1, 1024):
1423                     try:
1424                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1425                         uncompressed = io.BytesIO(gz.read())
1426                     except OSError:
1427                         continue
1428                     break
1429                 else:
1430                     raise original_ioerror
1431             resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1432             resp.msg = old_resp.msg
1433             del resp.headers['Content-encoding']
1434         # deflate
1435         if resp.headers.get('Content-encoding', '') == 'deflate':
1436             gz = io.BytesIO(self.deflate(resp.read()))
1437             resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1438             resp.msg = old_resp.msg
1439             del resp.headers['Content-encoding']
1440         # brotli
1441         if resp.headers.get('Content-encoding', '') == 'br':
1442             resp = urllib.request.addinfourl(
1443                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1444             resp.msg = old_resp.msg
1445             del resp.headers['Content-encoding']
1446         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1447         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1448         if 300 <= resp.code < 400:
1449             location = resp.headers.get('Location')
1450             if location:
1451                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1452                 location = location.encode('iso-8859-1').decode()
1453                 location_escaped = escape_url(location)
1454                 if location != location_escaped:
1455                     del resp.headers['Location']
1456                     resp.headers['Location'] = location_escaped
1457         return resp
1458
1459     https_request = http_request
1460     https_response = http_response
1461
1462
1463 def make_socks_conn_class(base_class, socks_proxy):
1464     assert issubclass(base_class, (
1465         http.client.HTTPConnection, http.client.HTTPSConnection))
1466
1467     url_components = urllib.parse.urlparse(socks_proxy)
1468     if url_components.scheme.lower() == 'socks5':
1469         socks_type = ProxyType.SOCKS5
1470     elif url_components.scheme.lower() in ('socks', 'socks4'):
1471         socks_type = ProxyType.SOCKS4
1472     elif url_components.scheme.lower() == 'socks4a':
1473         socks_type = ProxyType.SOCKS4A
1474
1475     def unquote_if_non_empty(s):
1476         if not s:
1477             return s
1478         return urllib.parse.unquote_plus(s)
1479
1480     proxy_args = (
1481         socks_type,
1482         url_components.hostname, url_components.port or 1080,
1483         True,  # Remote DNS
1484         unquote_if_non_empty(url_components.username),
1485         unquote_if_non_empty(url_components.password),
1486     )
1487
1488     class SocksConnection(base_class):
1489         def connect(self):
1490             self.sock = sockssocket()
1491             self.sock.setproxy(*proxy_args)
1492             if isinstance(self.timeout, (int, float)):
1493                 self.sock.settimeout(self.timeout)
1494             self.sock.connect((self.host, self.port))
1495
1496             if isinstance(self, http.client.HTTPSConnection):
1497                 if hasattr(self, '_context'):  # Python > 2.6
1498                     self.sock = self._context.wrap_socket(
1499                         self.sock, server_hostname=self.host)
1500                 else:
1501                     self.sock = ssl.wrap_socket(self.sock)
1502
1503     return SocksConnection
1504
1505
1506 class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
1507     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1508         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
1509         self._https_conn_class = https_conn_class or http.client.HTTPSConnection
1510         self._params = params
1511
1512     def https_open(self, req):
1513         kwargs = {}
1514         conn_class = self._https_conn_class
1515
1516         if hasattr(self, '_context'):  # python > 2.6
1517             kwargs['context'] = self._context
1518         if hasattr(self, '_check_hostname'):  # python 3.x
1519             kwargs['check_hostname'] = self._check_hostname
1520
1521         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1522         if socks_proxy:
1523             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1524             del req.headers['Ytdl-socks-proxy']
1525
1526         try:
1527             return self.do_open(
1528                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1529         except urllib.error.URLError as e:
1530             if (isinstance(e.reason, ssl.SSLError)
1531                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1532                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1533             raise
1534
1535
1536 def is_path_like(f):
1537     return isinstance(f, (str, bytes, os.PathLike))
1538
1539
1540 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1541     """
1542     See [1] for cookie file format.
1543
1544     1. https://curl.haxx.se/docs/http-cookies.html
1545     """
1546     _HTTPONLY_PREFIX = '#HttpOnly_'
1547     _ENTRY_LEN = 7
1548     _HEADER = '''# Netscape HTTP Cookie File
1549 # This file is generated by yt-dlp.  Do not edit.
1550
1551 '''
1552     _CookieFileEntry = collections.namedtuple(
1553         'CookieFileEntry',
1554         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1555
1556     def __init__(self, filename=None, *args, **kwargs):
1557         super().__init__(None, *args, **kwargs)
1558         if is_path_like(filename):
1559             filename = os.fspath(filename)
1560         self.filename = filename
1561
1562     @staticmethod
1563     def _true_or_false(cndn):
1564         return 'TRUE' if cndn else 'FALSE'
1565
1566     @contextlib.contextmanager
1567     def open(self, file, *, write=False):
1568         if is_path_like(file):
1569             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1570                 yield f
1571         else:
1572             if write:
1573                 file.truncate(0)
1574             yield file
1575
1576     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1577         now = time.time()
1578         for cookie in self:
1579             if (not ignore_discard and cookie.discard
1580                     or not ignore_expires and cookie.is_expired(now)):
1581                 continue
1582             name, value = cookie.name, cookie.value
1583             if value is None:
1584                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1585                 # with no name, whereas http.cookiejar regards it as a
1586                 # cookie with no value.
1587                 name, value = '', name
1588             f.write('%s\n' % '\t'.join((
1589                 cookie.domain,
1590                 self._true_or_false(cookie.domain.startswith('.')),
1591                 cookie.path,
1592                 self._true_or_false(cookie.secure),
1593                 str_or_none(cookie.expires, default=''),
1594                 name, value
1595             )))
1596
1597     def save(self, filename=None, *args, **kwargs):
1598         """
1599         Save cookies to a file.
1600         Code is taken from CPython 3.6
1601         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1602
1603         if filename is None:
1604             if self.filename is not None:
1605                 filename = self.filename
1606             else:
1607                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1608
1609         # Store session cookies with `expires` set to 0 instead of an empty string
1610         for cookie in self:
1611             if cookie.expires is None:
1612                 cookie.expires = 0
1613
1614         with self.open(filename, write=True) as f:
1615             f.write(self._HEADER)
1616             self._really_save(f, *args, **kwargs)
1617
1618     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1619         """Load cookies from a file."""
1620         if filename is None:
1621             if self.filename is not None:
1622                 filename = self.filename
1623             else:
1624                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1625
1626         def prepare_line(line):
1627             if line.startswith(self._HTTPONLY_PREFIX):
1628                 line = line[len(self._HTTPONLY_PREFIX):]
1629             # comments and empty lines are fine
1630             if line.startswith('#') or not line.strip():
1631                 return line
1632             cookie_list = line.split('\t')
1633             if len(cookie_list) != self._ENTRY_LEN:
1634                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1635             cookie = self._CookieFileEntry(*cookie_list)
1636             if cookie.expires_at and not cookie.expires_at.isdigit():
1637                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1638             return line
1639
1640         cf = io.StringIO()
1641         with self.open(filename) as f:
1642             for line in f:
1643                 try:
1644                     cf.write(prepare_line(line))
1645                 except http.cookiejar.LoadError as e:
1646                     if f'{line.strip()} '[0] in '[{"':
1647                         raise http.cookiejar.LoadError(
1648                             'Cookies file must be Netscape formatted, not JSON. See  '
1649                             'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1650                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1651                     continue
1652         cf.seek(0)
1653         self._really_load(cf, filename, ignore_discard, ignore_expires)
1654         # Session cookies are denoted by either `expires` field set to
1655         # an empty string or 0. MozillaCookieJar only recognizes the former
1656         # (see [1]). So we need force the latter to be recognized as session
1657         # cookies on our own.
1658         # Session cookies may be important for cookies-based authentication,
1659         # e.g. usually, when user does not check 'Remember me' check box while
1660         # logging in on a site, some important cookies are stored as session
1661         # cookies so that not recognizing them will result in failed login.
1662         # 1. https://bugs.python.org/issue17164
1663         for cookie in self:
1664             # Treat `expires=0` cookies as session cookies
1665             if cookie.expires == 0:
1666                 cookie.expires = None
1667                 cookie.discard = True
1668
1669
1670 class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
1671     def __init__(self, cookiejar=None):
1672         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
1673
1674     def http_response(self, request, response):
1675         return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
1676
1677     https_request = urllib.request.HTTPCookieProcessor.http_request
1678     https_response = http_response
1679
1680
1681 class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
1682     """YoutubeDL redirect handler
1683
1684     The code is based on HTTPRedirectHandler implementation from CPython [1].
1685
1686     This redirect handler solves two issues:
1687      - ensures redirect URL is always unicode under python 2
1688      - introduces support for experimental HTTP response status code
1689        308 Permanent Redirect [2] used by some sites [3]
1690
1691     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1692     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1693     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1694     """
1695
1696     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
1697
1698     def redirect_request(self, req, fp, code, msg, headers, newurl):
1699         """Return a Request or None in response to a redirect.
1700
1701         This is called by the http_error_30x methods when a
1702         redirection response is received.  If a redirection should
1703         take place, return a new Request to allow http_error_30x to
1704         perform the redirect.  Otherwise, raise HTTPError if no-one
1705         else should try to handle this url.  Return None if you can't
1706         but another Handler might.
1707         """
1708         m = req.get_method()
1709         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1710                  or code in (301, 302, 303) and m == "POST")):
1711             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
1712         # Strictly (according to RFC 2616), 301 or 302 in response to
1713         # a POST MUST NOT cause a redirection without confirmation
1714         # from the user (of urllib.request, in this case).  In practice,
1715         # essentially all clients do redirect in this case, so we do
1716         # the same.
1717
1718         # Be conciliant with URIs containing a space.  This is mainly
1719         # redundant with the more complete encoding done in http_error_302(),
1720         # but it is kept for compatibility with other callers.
1721         newurl = newurl.replace(' ', '%20')
1722
1723         CONTENT_HEADERS = ("content-length", "content-type")
1724         # NB: don't use dict comprehension for python 2.6 compatibility
1725         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1726
1727         # A 303 must either use GET or HEAD for subsequent request
1728         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1729         if code == 303 and m != 'HEAD':
1730             m = 'GET'
1731         # 301 and 302 redirects are commonly turned into a GET from a POST
1732         # for subsequent requests by browsers, so we'll do the same.
1733         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1734         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1735         if code in (301, 302) and m == 'POST':
1736             m = 'GET'
1737
1738         return urllib.request.Request(
1739             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1740             unverifiable=True, method=m)
1741
1742
1743 def extract_timezone(date_str):
1744     m = re.search(
1745         r'''(?x)
1746             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1747             (?P<tz>Z|                                            # just the UTC Z, or
1748                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1749                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1750                    [ ]?                                          # optional space
1751                 (?P<sign>\+|-)                                   # +/-
1752                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1753             $)
1754         ''', date_str)
1755     if not m:
1756         m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1757         timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
1758         if timezone is not None:
1759             date_str = date_str[:-len(m.group('tz'))]
1760         timezone = datetime.timedelta(hours=timezone or 0)
1761     else:
1762         date_str = date_str[:-len(m.group('tz'))]
1763         if not m.group('sign'):
1764             timezone = datetime.timedelta()
1765         else:
1766             sign = 1 if m.group('sign') == '+' else -1
1767             timezone = datetime.timedelta(
1768                 hours=sign * int(m.group('hours')),
1769                 minutes=sign * int(m.group('minutes')))
1770     return timezone, date_str
1771
1772
1773 def parse_iso8601(date_str, delimiter='T', timezone=None):
1774     """ Return a UNIX timestamp from the given date """
1775
1776     if date_str is None:
1777         return None
1778
1779     date_str = re.sub(r'\.[0-9]+', '', date_str)
1780
1781     if timezone is None:
1782         timezone, date_str = extract_timezone(date_str)
1783
1784     with contextlib.suppress(ValueError):
1785         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1786         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1787         return calendar.timegm(dt.timetuple())
1788
1789
1790 def date_formats(day_first=True):
1791     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1792
1793
1794 def unified_strdate(date_str, day_first=True):
1795     """Return a string with the date in the format YYYYMMDD"""
1796
1797     if date_str is None:
1798         return None
1799     upload_date = None
1800     # Replace commas
1801     date_str = date_str.replace(',', ' ')
1802     # Remove AM/PM + timezone
1803     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1804     _, date_str = extract_timezone(date_str)
1805
1806     for expression in date_formats(day_first):
1807         with contextlib.suppress(ValueError):
1808             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1809     if upload_date is None:
1810         timetuple = email.utils.parsedate_tz(date_str)
1811         if timetuple:
1812             with contextlib.suppress(ValueError):
1813                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1814     if upload_date is not None:
1815         return str(upload_date)
1816
1817
1818 def unified_timestamp(date_str, day_first=True):
1819     if date_str is None:
1820         return None
1821
1822     date_str = re.sub(r'\s+', ' ', re.sub(
1823         r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
1824
1825     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1826     timezone, date_str = extract_timezone(date_str)
1827
1828     # Remove AM/PM + timezone
1829     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1830
1831     # Remove unrecognized timezones from ISO 8601 alike timestamps
1832     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1833     if m:
1834         date_str = date_str[:-len(m.group('tz'))]
1835
1836     # Python only supports microseconds, so remove nanoseconds
1837     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1838     if m:
1839         date_str = m.group(1)
1840
1841     for expression in date_formats(day_first):
1842         with contextlib.suppress(ValueError):
1843             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1844             return calendar.timegm(dt.timetuple())
1845
1846     timetuple = email.utils.parsedate_tz(date_str)
1847     if timetuple:
1848         return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
1849
1850
1851 def determine_ext(url, default_ext='unknown_video'):
1852     if url is None or '.' not in url:
1853         return default_ext
1854     guess = url.partition('?')[0].rpartition('.')[2]
1855     if re.match(r'^[A-Za-z0-9]+$', guess):
1856         return guess
1857     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1858     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1859         return guess.rstrip('/')
1860     else:
1861         return default_ext
1862
1863
1864 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1865     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1866
1867
1868 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1869     R"""
1870     Return a datetime object from a string.
1871     Supported format:
1872         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1873
1874     @param format       strftime format of DATE
1875     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1876                         auto: round to the unit provided in date_str (if applicable).
1877     """
1878     auto_precision = False
1879     if precision == 'auto':
1880         auto_precision = True
1881         precision = 'microsecond'
1882     today = datetime_round(datetime.datetime.utcnow(), precision)
1883     if date_str in ('now', 'today'):
1884         return today
1885     if date_str == 'yesterday':
1886         return today - datetime.timedelta(days=1)
1887     match = re.match(
1888         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1889         date_str)
1890     if match is not None:
1891         start_time = datetime_from_str(match.group('start'), precision, format)
1892         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1893         unit = match.group('unit')
1894         if unit == 'month' or unit == 'year':
1895             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1896             unit = 'day'
1897         else:
1898             if unit == 'week':
1899                 unit = 'day'
1900                 time *= 7
1901             delta = datetime.timedelta(**{unit + 's': time})
1902             new_date = start_time + delta
1903         if auto_precision:
1904             return datetime_round(new_date, unit)
1905         return new_date
1906
1907     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1908
1909
1910 def date_from_str(date_str, format='%Y%m%d', strict=False):
1911     R"""
1912     Return a date object from a string using datetime_from_str
1913
1914     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1915                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1916     """
1917     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1918         raise ValueError(f'Invalid date format "{date_str}"')
1919     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1920
1921
1922 def datetime_add_months(dt, months):
1923     """Increment/Decrement a datetime object by months."""
1924     month = dt.month + months - 1
1925     year = dt.year + month // 12
1926     month = month % 12 + 1
1927     day = min(dt.day, calendar.monthrange(year, month)[1])
1928     return dt.replace(year, month, day)
1929
1930
1931 def datetime_round(dt, precision='day'):
1932     """
1933     Round a datetime object's time to a specific precision
1934     """
1935     if precision == 'microsecond':
1936         return dt
1937
1938     unit_seconds = {
1939         'day': 86400,
1940         'hour': 3600,
1941         'minute': 60,
1942         'second': 1,
1943     }
1944     roundto = lambda x, n: ((x + n / 2) // n) * n
1945     timestamp = calendar.timegm(dt.timetuple())
1946     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1947
1948
1949 def hyphenate_date(date_str):
1950     """
1951     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1952     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1953     if match is not None:
1954         return '-'.join(match.groups())
1955     else:
1956         return date_str
1957
1958
1959 class DateRange:
1960     """Represents a time interval between two dates"""
1961
1962     def __init__(self, start=None, end=None):
1963         """start and end must be strings in the format accepted by date"""
1964         if start is not None:
1965             self.start = date_from_str(start, strict=True)
1966         else:
1967             self.start = datetime.datetime.min.date()
1968         if end is not None:
1969             self.end = date_from_str(end, strict=True)
1970         else:
1971             self.end = datetime.datetime.max.date()
1972         if self.start > self.end:
1973             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1974
1975     @classmethod
1976     def day(cls, day):
1977         """Returns a range that only contains the given day"""
1978         return cls(day, day)
1979
1980     def __contains__(self, date):
1981         """Check if the date is in the range"""
1982         if not isinstance(date, datetime.date):
1983             date = date_from_str(date)
1984         return self.start <= date <= self.end
1985
1986     def __str__(self):
1987         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1988
1989     def __eq__(self, other):
1990         return (isinstance(other, DateRange)
1991                 and self.start == other.start and self.end == other.end)
1992
1993
1994 def platform_name():
1995     """ Returns the platform name as a str """
1996     deprecation_warning(f'"{__name__}.platform_name" is deprecated, use "platform.platform" instead')
1997     return platform.platform()
1998
1999
2000 @functools.cache
2001 def system_identifier():
2002     python_implementation = platform.python_implementation()
2003     if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2004         python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
2005     libc_ver = []
2006     with contextlib.suppress(OSError):  # We may not have access to the executable
2007         libc_ver = platform.libc_ver()
2008
2009     return 'Python %s (%s %s) - %s (%s%s)' % (
2010         platform.python_version(),
2011         python_implementation,
2012         platform.architecture()[0],
2013         platform.platform(),
2014         ssl.OPENSSL_VERSION,
2015         format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
2016     )
2017
2018
2019 @functools.cache
2020 def get_windows_version():
2021     ''' Get Windows version. returns () if it's not running on Windows '''
2022     if compat_os_name == 'nt':
2023         return version_tuple(platform.win32_ver()[1])
2024     else:
2025         return ()
2026
2027
2028 def write_string(s, out=None, encoding=None):
2029     assert isinstance(s, str)
2030     out = out or sys.stderr
2031
2032     if compat_os_name == 'nt' and supports_terminal_sequences(out):
2033         s = re.sub(r'([\r\n]+)', r' \1', s)
2034
2035     enc, buffer = None, out
2036     if 'b' in getattr(out, 'mode', ''):
2037         enc = encoding or preferredencoding()
2038     elif hasattr(out, 'buffer'):
2039         buffer = out.buffer
2040         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
2041
2042     buffer.write(s.encode(enc, 'ignore') if enc else s)
2043     out.flush()
2044
2045
2046 def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
2047     from . import _IN_CLI
2048     if _IN_CLI:
2049         if msg in deprecation_warning._cache:
2050             return
2051         deprecation_warning._cache.add(msg)
2052         if printer:
2053             return printer(f'{msg}{bug_reports_message()}', **kwargs)
2054         return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
2055     else:
2056         import warnings
2057         warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
2058
2059
2060 deprecation_warning._cache = set()
2061
2062
2063 def bytes_to_intlist(bs):
2064     if not bs:
2065         return []
2066     if isinstance(bs[0], int):  # Python 3
2067         return list(bs)
2068     else:
2069         return [ord(c) for c in bs]
2070
2071
2072 def intlist_to_bytes(xs):
2073     if not xs:
2074         return b''
2075     return struct.pack('%dB' % len(xs), *xs)
2076
2077
2078 class LockingUnsupportedError(OSError):
2079     msg = 'File locking is not supported'
2080
2081     def __init__(self):
2082         super().__init__(self.msg)
2083
2084
2085 # Cross-platform file locking
2086 if sys.platform == 'win32':
2087     import ctypes
2088     import ctypes.wintypes
2089     import msvcrt
2090
2091     class OVERLAPPED(ctypes.Structure):
2092         _fields_ = [
2093             ('Internal', ctypes.wintypes.LPVOID),
2094             ('InternalHigh', ctypes.wintypes.LPVOID),
2095             ('Offset', ctypes.wintypes.DWORD),
2096             ('OffsetHigh', ctypes.wintypes.DWORD),
2097             ('hEvent', ctypes.wintypes.HANDLE),
2098         ]
2099
2100     kernel32 = ctypes.windll.kernel32
2101     LockFileEx = kernel32.LockFileEx
2102     LockFileEx.argtypes = [
2103         ctypes.wintypes.HANDLE,     # hFile
2104         ctypes.wintypes.DWORD,      # dwFlags
2105         ctypes.wintypes.DWORD,      # dwReserved
2106         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2107         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2108         ctypes.POINTER(OVERLAPPED)  # Overlapped
2109     ]
2110     LockFileEx.restype = ctypes.wintypes.BOOL
2111     UnlockFileEx = kernel32.UnlockFileEx
2112     UnlockFileEx.argtypes = [
2113         ctypes.wintypes.HANDLE,     # hFile
2114         ctypes.wintypes.DWORD,      # dwReserved
2115         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2116         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2117         ctypes.POINTER(OVERLAPPED)  # Overlapped
2118     ]
2119     UnlockFileEx.restype = ctypes.wintypes.BOOL
2120     whole_low = 0xffffffff
2121     whole_high = 0x7fffffff
2122
2123     def _lock_file(f, exclusive, block):
2124         overlapped = OVERLAPPED()
2125         overlapped.Offset = 0
2126         overlapped.OffsetHigh = 0
2127         overlapped.hEvent = 0
2128         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2129
2130         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2131                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2132                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2133             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2134             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2135
2136     def _unlock_file(f):
2137         assert f._lock_file_overlapped_p
2138         handle = msvcrt.get_osfhandle(f.fileno())
2139         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2140             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2141
2142 else:
2143     try:
2144         import fcntl
2145
2146         def _lock_file(f, exclusive, block):
2147             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2148             if not block:
2149                 flags |= fcntl.LOCK_NB
2150             try:
2151                 fcntl.flock(f, flags)
2152             except BlockingIOError:
2153                 raise
2154             except OSError:  # AOSP does not have flock()
2155                 fcntl.lockf(f, flags)
2156
2157         def _unlock_file(f):
2158             try:
2159                 fcntl.flock(f, fcntl.LOCK_UN)
2160             except OSError:
2161                 fcntl.lockf(f, fcntl.LOCK_UN)
2162
2163     except ImportError:
2164
2165         def _lock_file(f, exclusive, block):
2166             raise LockingUnsupportedError()
2167
2168         def _unlock_file(f):
2169             raise LockingUnsupportedError()
2170
2171
2172 class locked_file:
2173     locked = False
2174
2175     def __init__(self, filename, mode, block=True, encoding=None):
2176         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2177             raise NotImplementedError(mode)
2178         self.mode, self.block = mode, block
2179
2180         writable = any(f in mode for f in 'wax+')
2181         readable = any(f in mode for f in 'r+')
2182         flags = functools.reduce(operator.ior, (
2183             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2184             getattr(os, 'O_BINARY', 0),  # Windows only
2185             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2186             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2187             os.O_APPEND if 'a' in mode else 0,
2188             os.O_EXCL if 'x' in mode else 0,
2189             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2190         ))
2191
2192         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2193
2194     def __enter__(self):
2195         exclusive = 'r' not in self.mode
2196         try:
2197             _lock_file(self.f, exclusive, self.block)
2198             self.locked = True
2199         except OSError:
2200             self.f.close()
2201             raise
2202         if 'w' in self.mode:
2203             try:
2204                 self.f.truncate()
2205             except OSError as e:
2206                 if e.errno not in (
2207                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2208                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2209                 ):
2210                     raise
2211         return self
2212
2213     def unlock(self):
2214         if not self.locked:
2215             return
2216         try:
2217             _unlock_file(self.f)
2218         finally:
2219             self.locked = False
2220
2221     def __exit__(self, *_):
2222         try:
2223             self.unlock()
2224         finally:
2225             self.f.close()
2226
2227     open = __enter__
2228     close = __exit__
2229
2230     def __getattr__(self, attr):
2231         return getattr(self.f, attr)
2232
2233     def __iter__(self):
2234         return iter(self.f)
2235
2236
2237 @functools.cache
2238 def get_filesystem_encoding():
2239     encoding = sys.getfilesystemencoding()
2240     return encoding if encoding is not None else 'utf-8'
2241
2242
2243 def shell_quote(args):
2244     quoted_args = []
2245     encoding = get_filesystem_encoding()
2246     for a in args:
2247         if isinstance(a, bytes):
2248             # We may get a filename encoded with 'encodeFilename'
2249             a = a.decode(encoding)
2250         quoted_args.append(compat_shlex_quote(a))
2251     return ' '.join(quoted_args)
2252
2253
2254 def smuggle_url(url, data):
2255     """ Pass additional data in a URL for internal use. """
2256
2257     url, idata = unsmuggle_url(url, {})
2258     data.update(idata)
2259     sdata = urllib.parse.urlencode(
2260         {'__youtubedl_smuggle': json.dumps(data)})
2261     return url + '#' + sdata
2262
2263
2264 def unsmuggle_url(smug_url, default=None):
2265     if '#__youtubedl_smuggle' not in smug_url:
2266         return smug_url, default
2267     url, _, sdata = smug_url.rpartition('#')
2268     jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
2269     data = json.loads(jsond)
2270     return url, data
2271
2272
2273 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2274     """ Formats numbers with decimal sufixes like K, M, etc """
2275     num, factor = float_or_none(num), float(factor)
2276     if num is None or num < 0:
2277         return None
2278     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2279     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2280     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2281     if factor == 1024:
2282         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2283     converted = num / (factor ** exponent)
2284     return fmt % (converted, suffix)
2285
2286
2287 def format_bytes(bytes):
2288     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2289
2290
2291 def lookup_unit_table(unit_table, s):
2292     units_re = '|'.join(re.escape(u) for u in unit_table)
2293     m = re.match(
2294         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2295     if not m:
2296         return None
2297     num_str = m.group('num').replace(',', '.')
2298     mult = unit_table[m.group('unit')]
2299     return int(float(num_str) * mult)
2300
2301
2302 def parse_filesize(s):
2303     if s is None:
2304         return None
2305
2306     # The lower-case forms are of course incorrect and unofficial,
2307     # but we support those too
2308     _UNIT_TABLE = {
2309         'B': 1,
2310         'b': 1,
2311         'bytes': 1,
2312         'KiB': 1024,
2313         'KB': 1000,
2314         'kB': 1024,
2315         'Kb': 1000,
2316         'kb': 1000,
2317         'kilobytes': 1000,
2318         'kibibytes': 1024,
2319         'MiB': 1024 ** 2,
2320         'MB': 1000 ** 2,
2321         'mB': 1024 ** 2,
2322         'Mb': 1000 ** 2,
2323         'mb': 1000 ** 2,
2324         'megabytes': 1000 ** 2,
2325         'mebibytes': 1024 ** 2,
2326         'GiB': 1024 ** 3,
2327         'GB': 1000 ** 3,
2328         'gB': 1024 ** 3,
2329         'Gb': 1000 ** 3,
2330         'gb': 1000 ** 3,
2331         'gigabytes': 1000 ** 3,
2332         'gibibytes': 1024 ** 3,
2333         'TiB': 1024 ** 4,
2334         'TB': 1000 ** 4,
2335         'tB': 1024 ** 4,
2336         'Tb': 1000 ** 4,
2337         'tb': 1000 ** 4,
2338         'terabytes': 1000 ** 4,
2339         'tebibytes': 1024 ** 4,
2340         'PiB': 1024 ** 5,
2341         'PB': 1000 ** 5,
2342         'pB': 1024 ** 5,
2343         'Pb': 1000 ** 5,
2344         'pb': 1000 ** 5,
2345         'petabytes': 1000 ** 5,
2346         'pebibytes': 1024 ** 5,
2347         'EiB': 1024 ** 6,
2348         'EB': 1000 ** 6,
2349         'eB': 1024 ** 6,
2350         'Eb': 1000 ** 6,
2351         'eb': 1000 ** 6,
2352         'exabytes': 1000 ** 6,
2353         'exbibytes': 1024 ** 6,
2354         'ZiB': 1024 ** 7,
2355         'ZB': 1000 ** 7,
2356         'zB': 1024 ** 7,
2357         'Zb': 1000 ** 7,
2358         'zb': 1000 ** 7,
2359         'zettabytes': 1000 ** 7,
2360         'zebibytes': 1024 ** 7,
2361         'YiB': 1024 ** 8,
2362         'YB': 1000 ** 8,
2363         'yB': 1024 ** 8,
2364         'Yb': 1000 ** 8,
2365         'yb': 1000 ** 8,
2366         'yottabytes': 1000 ** 8,
2367         'yobibytes': 1024 ** 8,
2368     }
2369
2370     return lookup_unit_table(_UNIT_TABLE, s)
2371
2372
2373 def parse_count(s):
2374     if s is None:
2375         return None
2376
2377     s = re.sub(r'^[^\d]+\s', '', s).strip()
2378
2379     if re.match(r'^[\d,.]+$', s):
2380         return str_to_int(s)
2381
2382     _UNIT_TABLE = {
2383         'k': 1000,
2384         'K': 1000,
2385         'm': 1000 ** 2,
2386         'M': 1000 ** 2,
2387         'kk': 1000 ** 2,
2388         'KK': 1000 ** 2,
2389         'b': 1000 ** 3,
2390         'B': 1000 ** 3,
2391     }
2392
2393     ret = lookup_unit_table(_UNIT_TABLE, s)
2394     if ret is not None:
2395         return ret
2396
2397     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2398     if mobj:
2399         return str_to_int(mobj.group(1))
2400
2401
2402 def parse_resolution(s, *, lenient=False):
2403     if s is None:
2404         return {}
2405
2406     if lenient:
2407         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2408     else:
2409         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2410     if mobj:
2411         return {
2412             'width': int(mobj.group('w')),
2413             'height': int(mobj.group('h')),
2414         }
2415
2416     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2417     if mobj:
2418         return {'height': int(mobj.group(1))}
2419
2420     mobj = re.search(r'\b([48])[kK]\b', s)
2421     if mobj:
2422         return {'height': int(mobj.group(1)) * 540}
2423
2424     return {}
2425
2426
2427 def parse_bitrate(s):
2428     if not isinstance(s, str):
2429         return
2430     mobj = re.search(r'\b(\d+)\s*kbps', s)
2431     if mobj:
2432         return int(mobj.group(1))
2433
2434
2435 def month_by_name(name, lang='en'):
2436     """ Return the number of a month by (locale-independently) English name """
2437
2438     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2439
2440     try:
2441         return month_names.index(name) + 1
2442     except ValueError:
2443         return None
2444
2445
2446 def month_by_abbreviation(abbrev):
2447     """ Return the number of a month by (locale-independently) English
2448         abbreviations """
2449
2450     try:
2451         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2452     except ValueError:
2453         return None
2454
2455
2456 def fix_xml_ampersands(xml_str):
2457     """Replace all the '&' by '&amp;' in XML"""
2458     return re.sub(
2459         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2460         '&amp;',
2461         xml_str)
2462
2463
2464 def setproctitle(title):
2465     assert isinstance(title, str)
2466
2467     # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
2468     try:
2469         import ctypes
2470     except ImportError:
2471         return
2472
2473     try:
2474         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2475     except OSError:
2476         return
2477     except TypeError:
2478         # LoadLibrary in Windows Python 2.7.13 only expects
2479         # a bytestring, but since unicode_literals turns
2480         # every string into a unicode string, it fails.
2481         return
2482     title_bytes = title.encode()
2483     buf = ctypes.create_string_buffer(len(title_bytes))
2484     buf.value = title_bytes
2485     try:
2486         libc.prctl(15, buf, 0, 0, 0)
2487     except AttributeError:
2488         return  # Strange libc, just skip this
2489
2490
2491 def remove_start(s, start):
2492     return s[len(start):] if s is not None and s.startswith(start) else s
2493
2494
2495 def remove_end(s, end):
2496     return s[:-len(end)] if s is not None and s.endswith(end) else s
2497
2498
2499 def remove_quotes(s):
2500     if s is None or len(s) < 2:
2501         return s
2502     for quote in ('"', "'", ):
2503         if s[0] == quote and s[-1] == quote:
2504             return s[1:-1]
2505     return s
2506
2507
2508 def get_domain(url):
2509     """
2510     This implementation is inconsistent, but is kept for compatibility.
2511     Use this only for "webpage_url_domain"
2512     """
2513     return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
2514
2515
2516 def url_basename(url):
2517     path = urllib.parse.urlparse(url).path
2518     return path.strip('/').split('/')[-1]
2519
2520
2521 def base_url(url):
2522     return re.match(r'https?://[^?#]+/', url).group()
2523
2524
2525 def urljoin(base, path):
2526     if isinstance(path, bytes):
2527         path = path.decode()
2528     if not isinstance(path, str) or not path:
2529         return None
2530     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2531         return path
2532     if isinstance(base, bytes):
2533         base = base.decode()
2534     if not isinstance(base, str) or not re.match(
2535             r'^(?:https?:)?//', base):
2536         return None
2537     return urllib.parse.urljoin(base, path)
2538
2539
2540 class HEADRequest(urllib.request.Request):
2541     def get_method(self):
2542         return 'HEAD'
2543
2544
2545 class PUTRequest(urllib.request.Request):
2546     def get_method(self):
2547         return 'PUT'
2548
2549
2550 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2551     if get_attr and v is not None:
2552         v = getattr(v, get_attr, None)
2553     try:
2554         return int(v) * invscale // scale
2555     except (ValueError, TypeError, OverflowError):
2556         return default
2557
2558
2559 def str_or_none(v, default=None):
2560     return default if v is None else str(v)
2561
2562
2563 def str_to_int(int_str):
2564     """ A more relaxed version of int_or_none """
2565     if isinstance(int_str, int):
2566         return int_str
2567     elif isinstance(int_str, str):
2568         int_str = re.sub(r'[,\.\+]', '', int_str)
2569         return int_or_none(int_str)
2570
2571
2572 def float_or_none(v, scale=1, invscale=1, default=None):
2573     if v is None:
2574         return default
2575     try:
2576         return float(v) * invscale / scale
2577     except (ValueError, TypeError):
2578         return default
2579
2580
2581 def bool_or_none(v, default=None):
2582     return v if isinstance(v, bool) else default
2583
2584
2585 def strip_or_none(v, default=None):
2586     return v.strip() if isinstance(v, str) else default
2587
2588
2589 def url_or_none(url):
2590     if not url or not isinstance(url, str):
2591         return None
2592     url = url.strip()
2593     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2594
2595
2596 def request_to_url(req):
2597     if isinstance(req, urllib.request.Request):
2598         return req.get_full_url()
2599     else:
2600         return req
2601
2602
2603 def strftime_or_none(timestamp, date_format, default=None):
2604     datetime_object = None
2605     try:
2606         if isinstance(timestamp, (int, float)):  # unix timestamp
2607             # Using naive datetime here can break timestamp() in Windows
2608             # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
2609             datetime_object = datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
2610         elif isinstance(timestamp, str):  # assume YYYYMMDD
2611             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2612         date_format = re.sub(  # Support %s on windows
2613             r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
2614         return datetime_object.strftime(date_format)
2615     except (ValueError, TypeError, AttributeError):
2616         return default
2617
2618
2619 def parse_duration(s):
2620     if not isinstance(s, str):
2621         return None
2622     s = s.strip()
2623     if not s:
2624         return None
2625
2626     days, hours, mins, secs, ms = [None] * 5
2627     m = re.match(r'''(?x)
2628             (?P<before_secs>
2629                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2630             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2631             (?P<ms>[.:][0-9]+)?Z?$
2632         ''', s)
2633     if m:
2634         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2635     else:
2636         m = re.match(
2637             r'''(?ix)(?:P?
2638                 (?:
2639                     [0-9]+\s*y(?:ears?)?,?\s*
2640                 )?
2641                 (?:
2642                     [0-9]+\s*m(?:onths?)?,?\s*
2643                 )?
2644                 (?:
2645                     [0-9]+\s*w(?:eeks?)?,?\s*
2646                 )?
2647                 (?:
2648                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2649                 )?
2650                 T)?
2651                 (?:
2652                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2653                 )?
2654                 (?:
2655                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2656                 )?
2657                 (?:
2658                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2659                 )?Z?$''', s)
2660         if m:
2661             days, hours, mins, secs, ms = m.groups()
2662         else:
2663             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2664             if m:
2665                 hours, mins = m.groups()
2666             else:
2667                 return None
2668
2669     if ms:
2670         ms = ms.replace(':', '.')
2671     return sum(float(part or 0) * mult for part, mult in (
2672         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2673
2674
2675 def prepend_extension(filename, ext, expected_real_ext=None):
2676     name, real_ext = os.path.splitext(filename)
2677     return (
2678         f'{name}.{ext}{real_ext}'
2679         if not expected_real_ext or real_ext[1:] == expected_real_ext
2680         else f'{filename}.{ext}')
2681
2682
2683 def replace_extension(filename, ext, expected_real_ext=None):
2684     name, real_ext = os.path.splitext(filename)
2685     return '{}.{}'.format(
2686         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2687         ext)
2688
2689
2690 def check_executable(exe, args=[]):
2691     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2692     args can be a list of arguments for a short output (like -version) """
2693     try:
2694         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2695     except OSError:
2696         return False
2697     return exe
2698
2699
2700 def _get_exe_version_output(exe, args, *, to_screen=None):
2701     if to_screen:
2702         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2703     try:
2704         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2705         # SIGTTOU if yt-dlp is run in the background.
2706         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2707         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2708                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2709     except OSError:
2710         return False
2711     return stdout
2712
2713
2714 def detect_exe_version(output, version_re=None, unrecognized='present'):
2715     assert isinstance(output, str)
2716     if version_re is None:
2717         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2718     m = re.search(version_re, output)
2719     if m:
2720         return m.group(1)
2721     else:
2722         return unrecognized
2723
2724
2725 def get_exe_version(exe, args=['--version'],
2726                     version_re=None, unrecognized='present'):
2727     """ Returns the version of the specified executable,
2728     or False if the executable is not present """
2729     out = _get_exe_version_output(exe, args)
2730     return detect_exe_version(out, version_re, unrecognized) if out else False
2731
2732
2733 def frange(start=0, stop=None, step=1):
2734     """Float range"""
2735     if stop is None:
2736         start, stop = 0, start
2737     sign = [-1, 1][step > 0] if step else 0
2738     while sign * start < sign * stop:
2739         yield start
2740         start += step
2741
2742
2743 class LazyList(collections.abc.Sequence):
2744     """Lazy immutable list from an iterable
2745     Note that slices of a LazyList are lists and not LazyList"""
2746
2747     class IndexError(IndexError):
2748         pass
2749
2750     def __init__(self, iterable, *, reverse=False, _cache=None):
2751         self._iterable = iter(iterable)
2752         self._cache = [] if _cache is None else _cache
2753         self._reversed = reverse
2754
2755     def __iter__(self):
2756         if self._reversed:
2757             # We need to consume the entire iterable to iterate in reverse
2758             yield from self.exhaust()
2759             return
2760         yield from self._cache
2761         for item in self._iterable:
2762             self._cache.append(item)
2763             yield item
2764
2765     def _exhaust(self):
2766         self._cache.extend(self._iterable)
2767         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2768         return self._cache
2769
2770     def exhaust(self):
2771         """Evaluate the entire iterable"""
2772         return self._exhaust()[::-1 if self._reversed else 1]
2773
2774     @staticmethod
2775     def _reverse_index(x):
2776         return None if x is None else ~x
2777
2778     def __getitem__(self, idx):
2779         if isinstance(idx, slice):
2780             if self._reversed:
2781                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2782             start, stop, step = idx.start, idx.stop, idx.step or 1
2783         elif isinstance(idx, int):
2784             if self._reversed:
2785                 idx = self._reverse_index(idx)
2786             start, stop, step = idx, idx, 0
2787         else:
2788             raise TypeError('indices must be integers or slices')
2789         if ((start or 0) < 0 or (stop or 0) < 0
2790                 or (start is None and step < 0)
2791                 or (stop is None and step > 0)):
2792             # We need to consume the entire iterable to be able to slice from the end
2793             # Obviously, never use this with infinite iterables
2794             self._exhaust()
2795             try:
2796                 return self._cache[idx]
2797             except IndexError as e:
2798                 raise self.IndexError(e) from e
2799         n = max(start or 0, stop or 0) - len(self._cache) + 1
2800         if n > 0:
2801             self._cache.extend(itertools.islice(self._iterable, n))
2802         try:
2803             return self._cache[idx]
2804         except IndexError as e:
2805             raise self.IndexError(e) from e
2806
2807     def __bool__(self):
2808         try:
2809             self[-1] if self._reversed else self[0]
2810         except self.IndexError:
2811             return False
2812         return True
2813
2814     def __len__(self):
2815         self._exhaust()
2816         return len(self._cache)
2817
2818     def __reversed__(self):
2819         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2820
2821     def __copy__(self):
2822         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2823
2824     def __repr__(self):
2825         # repr and str should mimic a list. So we exhaust the iterable
2826         return repr(self.exhaust())
2827
2828     def __str__(self):
2829         return repr(self.exhaust())
2830
2831
2832 class PagedList:
2833
2834     class IndexError(IndexError):
2835         pass
2836
2837     def __len__(self):
2838         # This is only useful for tests
2839         return len(self.getslice())
2840
2841     def __init__(self, pagefunc, pagesize, use_cache=True):
2842         self._pagefunc = pagefunc
2843         self._pagesize = pagesize
2844         self._pagecount = float('inf')
2845         self._use_cache = use_cache
2846         self._cache = {}
2847
2848     def getpage(self, pagenum):
2849         page_results = self._cache.get(pagenum)
2850         if page_results is None:
2851             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2852         if self._use_cache:
2853             self._cache[pagenum] = page_results
2854         return page_results
2855
2856     def getslice(self, start=0, end=None):
2857         return list(self._getslice(start, end))
2858
2859     def _getslice(self, start, end):
2860         raise NotImplementedError('This method must be implemented by subclasses')
2861
2862     def __getitem__(self, idx):
2863         assert self._use_cache, 'Indexing PagedList requires cache'
2864         if not isinstance(idx, int) or idx < 0:
2865             raise TypeError('indices must be non-negative integers')
2866         entries = self.getslice(idx, idx + 1)
2867         if not entries:
2868             raise self.IndexError()
2869         return entries[0]
2870
2871
2872 class OnDemandPagedList(PagedList):
2873     """Download pages until a page with less than maximum results"""
2874
2875     def _getslice(self, start, end):
2876         for pagenum in itertools.count(start // self._pagesize):
2877             firstid = pagenum * self._pagesize
2878             nextfirstid = pagenum * self._pagesize + self._pagesize
2879             if start >= nextfirstid:
2880                 continue
2881
2882             startv = (
2883                 start % self._pagesize
2884                 if firstid <= start < nextfirstid
2885                 else 0)
2886             endv = (
2887                 ((end - 1) % self._pagesize) + 1
2888                 if (end is not None and firstid <= end <= nextfirstid)
2889                 else None)
2890
2891             try:
2892                 page_results = self.getpage(pagenum)
2893             except Exception:
2894                 self._pagecount = pagenum - 1
2895                 raise
2896             if startv != 0 or endv is not None:
2897                 page_results = page_results[startv:endv]
2898             yield from page_results
2899
2900             # A little optimization - if current page is not "full", ie. does
2901             # not contain page_size videos then we can assume that this page
2902             # is the last one - there are no more ids on further pages -
2903             # i.e. no need to query again.
2904             if len(page_results) + startv < self._pagesize:
2905                 break
2906
2907             # If we got the whole page, but the next page is not interesting,
2908             # break out early as well
2909             if end == nextfirstid:
2910                 break
2911
2912
2913 class InAdvancePagedList(PagedList):
2914     """PagedList with total number of pages known in advance"""
2915
2916     def __init__(self, pagefunc, pagecount, pagesize):
2917         PagedList.__init__(self, pagefunc, pagesize, True)
2918         self._pagecount = pagecount
2919
2920     def _getslice(self, start, end):
2921         start_page = start // self._pagesize
2922         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2923         skip_elems = start - start_page * self._pagesize
2924         only_more = None if end is None else end - start
2925         for pagenum in range(start_page, end_page):
2926             page_results = self.getpage(pagenum)
2927             if skip_elems:
2928                 page_results = page_results[skip_elems:]
2929                 skip_elems = None
2930             if only_more is not None:
2931                 if len(page_results) < only_more:
2932                     only_more -= len(page_results)
2933                 else:
2934                     yield from page_results[:only_more]
2935                     break
2936             yield from page_results
2937
2938
2939 class PlaylistEntries:
2940     MissingEntry = object()
2941     is_exhausted = False
2942
2943     def __init__(self, ydl, info_dict):
2944         self.ydl = ydl
2945
2946         # _entries must be assigned now since infodict can change during iteration
2947         entries = info_dict.get('entries')
2948         if entries is None:
2949             raise EntryNotInPlaylist('There are no entries')
2950         elif isinstance(entries, list):
2951             self.is_exhausted = True
2952
2953         requested_entries = info_dict.get('requested_entries')
2954         self.is_incomplete = bool(requested_entries)
2955         if self.is_incomplete:
2956             assert self.is_exhausted
2957             self._entries = [self.MissingEntry] * max(requested_entries)
2958             for i, entry in zip(requested_entries, entries):
2959                 self._entries[i - 1] = entry
2960         elif isinstance(entries, (list, PagedList, LazyList)):
2961             self._entries = entries
2962         else:
2963             self._entries = LazyList(entries)
2964
2965     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2966         (?P<start>[+-]?\d+)?
2967         (?P<range>[:-]
2968             (?P<end>[+-]?\d+|inf(?:inite)?)?
2969             (?::(?P<step>[+-]?\d+))?
2970         )?''')
2971
2972     @classmethod
2973     def parse_playlist_items(cls, string):
2974         for segment in string.split(','):
2975             if not segment:
2976                 raise ValueError('There is two or more consecutive commas')
2977             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2978             if not mobj:
2979                 raise ValueError(f'{segment!r} is not a valid specification')
2980             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2981             if int_or_none(step) == 0:
2982                 raise ValueError(f'Step in {segment!r} cannot be zero')
2983             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2984
2985     def get_requested_items(self):
2986         playlist_items = self.ydl.params.get('playlist_items')
2987         playlist_start = self.ydl.params.get('playliststart', 1)
2988         playlist_end = self.ydl.params.get('playlistend')
2989         # For backwards compatibility, interpret -1 as whole list
2990         if playlist_end in (-1, None):
2991             playlist_end = ''
2992         if not playlist_items:
2993             playlist_items = f'{playlist_start}:{playlist_end}'
2994         elif playlist_start != 1 or playlist_end:
2995             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
2996
2997         for index in self.parse_playlist_items(playlist_items):
2998             for i, entry in self[index]:
2999                 yield i, entry
3000                 if not entry:
3001                     continue
3002                 try:
3003                     # TODO: Add auto-generated fields
3004                     self.ydl._match_entry(entry, incomplete=True, silent=True)
3005                 except (ExistingVideoReached, RejectedVideoReached):
3006                     return
3007
3008     def get_full_count(self):
3009         if self.is_exhausted and not self.is_incomplete:
3010             return len(self)
3011         elif isinstance(self._entries, InAdvancePagedList):
3012             if self._entries._pagesize == 1:
3013                 return self._entries._pagecount
3014
3015     @functools.cached_property
3016     def _getter(self):
3017         if isinstance(self._entries, list):
3018             def get_entry(i):
3019                 try:
3020                     entry = self._entries[i]
3021                 except IndexError:
3022                     entry = self.MissingEntry
3023                     if not self.is_incomplete:
3024                         raise self.IndexError()
3025                 if entry is self.MissingEntry:
3026                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
3027                 return entry
3028         else:
3029             def get_entry(i):
3030                 try:
3031                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
3032                 except (LazyList.IndexError, PagedList.IndexError):
3033                     raise self.IndexError()
3034         return get_entry
3035
3036     def __getitem__(self, idx):
3037         if isinstance(idx, int):
3038             idx = slice(idx, idx)
3039
3040         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
3041         step = 1 if idx.step is None else idx.step
3042         if idx.start is None:
3043             start = 0 if step > 0 else len(self) - 1
3044         else:
3045             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
3046
3047         # NB: Do not call len(self) when idx == [:]
3048         if idx.stop is None:
3049             stop = 0 if step < 0 else float('inf')
3050         else:
3051             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
3052         stop += [-1, 1][step > 0]
3053
3054         for i in frange(start, stop, step):
3055             if i < 0:
3056                 continue
3057             try:
3058                 entry = self._getter(i)
3059             except self.IndexError:
3060                 self.is_exhausted = True
3061                 if step > 0:
3062                     break
3063                 continue
3064             yield i + 1, entry
3065
3066     def __len__(self):
3067         return len(tuple(self[:]))
3068
3069     class IndexError(IndexError):
3070         pass
3071
3072
3073 def uppercase_escape(s):
3074     unicode_escape = codecs.getdecoder('unicode_escape')
3075     return re.sub(
3076         r'\\U[0-9a-fA-F]{8}',
3077         lambda m: unicode_escape(m.group(0))[0],
3078         s)
3079
3080
3081 def lowercase_escape(s):
3082     unicode_escape = codecs.getdecoder('unicode_escape')
3083     return re.sub(
3084         r'\\u[0-9a-fA-F]{4}',
3085         lambda m: unicode_escape(m.group(0))[0],
3086         s)
3087
3088
3089 def escape_rfc3986(s):
3090     """Escape non-ASCII characters as suggested by RFC 3986"""
3091     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3092
3093
3094 def escape_url(url):
3095     """Escape URL as suggested by RFC 3986"""
3096     url_parsed = urllib.parse.urlparse(url)
3097     return url_parsed._replace(
3098         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3099         path=escape_rfc3986(url_parsed.path),
3100         params=escape_rfc3986(url_parsed.params),
3101         query=escape_rfc3986(url_parsed.query),
3102         fragment=escape_rfc3986(url_parsed.fragment)
3103     ).geturl()
3104
3105
3106 def parse_qs(url, **kwargs):
3107     return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
3108
3109
3110 def read_batch_urls(batch_fd):
3111     def fixup(url):
3112         if not isinstance(url, str):
3113             url = url.decode('utf-8', 'replace')
3114         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3115         for bom in BOM_UTF8:
3116             if url.startswith(bom):
3117                 url = url[len(bom):]
3118         url = url.lstrip()
3119         if not url or url.startswith(('#', ';', ']')):
3120             return False
3121         # "#" cannot be stripped out since it is part of the URI
3122         # However, it can be safely stripped out if following a whitespace
3123         return re.split(r'\s#', url, 1)[0].rstrip()
3124
3125     with contextlib.closing(batch_fd) as fd:
3126         return [url for url in map(fixup, fd) if url]
3127
3128
3129 def urlencode_postdata(*args, **kargs):
3130     return urllib.parse.urlencode(*args, **kargs).encode('ascii')
3131
3132
3133 def update_url_query(url, query):
3134     if not query:
3135         return url
3136     parsed_url = urllib.parse.urlparse(url)
3137     qs = urllib.parse.parse_qs(parsed_url.query)
3138     qs.update(query)
3139     return urllib.parse.urlunparse(parsed_url._replace(
3140         query=urllib.parse.urlencode(qs, True)))
3141
3142
3143 def update_Request(req, url=None, data=None, headers=None, query=None):
3144     req_headers = req.headers.copy()
3145     req_headers.update(headers or {})
3146     req_data = data or req.data
3147     req_url = update_url_query(url or req.get_full_url(), query)
3148     req_get_method = req.get_method()
3149     if req_get_method == 'HEAD':
3150         req_type = HEADRequest
3151     elif req_get_method == 'PUT':
3152         req_type = PUTRequest
3153     else:
3154         req_type = urllib.request.Request
3155     new_req = req_type(
3156         req_url, data=req_data, headers=req_headers,
3157         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3158     if hasattr(req, 'timeout'):
3159         new_req.timeout = req.timeout
3160     return new_req
3161
3162
3163 def _multipart_encode_impl(data, boundary):
3164     content_type = 'multipart/form-data; boundary=%s' % boundary
3165
3166     out = b''
3167     for k, v in data.items():
3168         out += b'--' + boundary.encode('ascii') + b'\r\n'
3169         if isinstance(k, str):
3170             k = k.encode()
3171         if isinstance(v, str):
3172             v = v.encode()
3173         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3174         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3175         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3176         if boundary.encode('ascii') in content:
3177             raise ValueError('Boundary overlaps with data')
3178         out += content
3179
3180     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3181
3182     return out, content_type
3183
3184
3185 def multipart_encode(data, boundary=None):
3186     '''
3187     Encode a dict to RFC 7578-compliant form-data
3188
3189     data:
3190         A dict where keys and values can be either Unicode or bytes-like
3191         objects.
3192     boundary:
3193         If specified a Unicode object, it's used as the boundary. Otherwise
3194         a random boundary is generated.
3195
3196     Reference: https://tools.ietf.org/html/rfc7578
3197     '''
3198     has_specified_boundary = boundary is not None
3199
3200     while True:
3201         if boundary is None:
3202             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3203
3204         try:
3205             out, content_type = _multipart_encode_impl(data, boundary)
3206             break
3207         except ValueError:
3208             if has_specified_boundary:
3209                 raise
3210             boundary = None
3211
3212     return out, content_type
3213
3214
3215 def variadic(x, allowed_types=(str, bytes, dict)):
3216     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
3217
3218
3219 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3220     for val in map(d.get, variadic(key_or_keys)):
3221         if val is not None and (val or not skip_false_values):
3222             return val
3223     return default
3224
3225
3226 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3227     for f in funcs:
3228         try:
3229             val = f(*args, **kwargs)
3230         except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
3231             pass
3232         else:
3233             if expected_type is None or isinstance(val, expected_type):
3234                 return val
3235
3236
3237 def try_get(src, getter, expected_type=None):
3238     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3239
3240
3241 def filter_dict(dct, cndn=lambda _, v: v is not None):
3242     return {k: v for k, v in dct.items() if cndn(k, v)}
3243
3244
3245 def merge_dicts(*dicts):
3246     merged = {}
3247     for a_dict in dicts:
3248         for k, v in a_dict.items():
3249             if (v is not None and k not in merged
3250                     or isinstance(v, str) and merged[k] == ''):
3251                 merged[k] = v
3252     return merged
3253
3254
3255 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3256     return string if isinstance(string, str) else str(string, encoding, errors)
3257
3258
3259 US_RATINGS = {
3260     'G': 0,
3261     'PG': 10,
3262     'PG-13': 13,
3263     'R': 16,
3264     'NC': 18,
3265 }
3266
3267
3268 TV_PARENTAL_GUIDELINES = {
3269     'TV-Y': 0,
3270     'TV-Y7': 7,
3271     'TV-G': 0,
3272     'TV-PG': 0,
3273     'TV-14': 14,
3274     'TV-MA': 17,
3275 }
3276
3277
3278 def parse_age_limit(s):
3279     # isinstance(False, int) is True. So type() must be used instead
3280     if type(s) is int:  # noqa: E721
3281         return s if 0 <= s <= 21 else None
3282     elif not isinstance(s, str):
3283         return None
3284     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3285     if m:
3286         return int(m.group('age'))
3287     s = s.upper()
3288     if s in US_RATINGS:
3289         return US_RATINGS[s]
3290     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3291     if m:
3292         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3293     return None
3294
3295
3296 def strip_jsonp(code):
3297     return re.sub(
3298         r'''(?sx)^
3299             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3300             (?:\s*&&\s*(?P=func_name))?
3301             \s*\(\s*(?P<callback_data>.*)\);?
3302             \s*?(?://[^\n]*)*$''',
3303         r'\g<callback_data>', code)
3304
3305
3306 def js_to_json(code, vars={}, *, strict=False):
3307     # vars is a dict of var, val pairs to substitute
3308     STRING_QUOTES = '\'"'
3309     STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
3310     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3311     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3312     INTEGER_TABLE = (
3313         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3314         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3315     )
3316
3317     def process_escape(match):
3318         JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
3319         escape = match.group(1) or match.group(2)
3320
3321         return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
3322                 else R'\u00' if escape == 'x'
3323                 else '' if escape == '\n'
3324                 else escape)
3325
3326     def fix_kv(m):
3327         v = m.group(0)
3328         if v in ('true', 'false', 'null'):
3329             return v
3330         elif v in ('undefined', 'void 0'):
3331             return 'null'
3332         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3333             return ''
3334
3335         if v[0] in STRING_QUOTES:
3336             escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v[1:-1])
3337             return f'"{escaped}"'
3338
3339         for regex, base in INTEGER_TABLE:
3340             im = re.match(regex, v)
3341             if im:
3342                 i = int(im.group(1), base)
3343                 return f'"{i}":' if v.endswith(':') else str(i)
3344
3345         if v in vars:
3346             return json.dumps(vars[v])
3347
3348         if not strict:
3349             return f'"{v}"'
3350
3351         raise ValueError(f'Unknown value: {v}')
3352
3353     def create_map(mobj):
3354         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
3355
3356     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
3357     if not strict:
3358         code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3359         code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
3360
3361     return re.sub(rf'''(?sx)
3362         {STRING_RE}|
3363         {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
3364         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3365         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
3366         [0-9]+(?={SKIP_RE}:)|
3367         !+
3368         ''', fix_kv, code)
3369
3370
3371 def qualities(quality_ids):
3372     """ Get a numeric quality value out of a list of possible values """
3373     def q(qid):
3374         try:
3375             return quality_ids.index(qid)
3376         except ValueError:
3377             return -1
3378     return q
3379
3380
3381 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3382
3383
3384 DEFAULT_OUTTMPL = {
3385     'default': '%(title)s [%(id)s].%(ext)s',
3386     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3387 }
3388 OUTTMPL_TYPES = {
3389     'chapter': None,
3390     'subtitle': None,
3391     'thumbnail': None,
3392     'description': 'description',
3393     'annotation': 'annotations.xml',
3394     'infojson': 'info.json',
3395     'link': None,
3396     'pl_video': None,
3397     'pl_thumbnail': None,
3398     'pl_description': 'description',
3399     'pl_infojson': 'info.json',
3400 }
3401
3402 # As of [1] format syntax is:
3403 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3404 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3405 STR_FORMAT_RE_TMPL = r'''(?x)
3406     (?<!%)(?P<prefix>(?:%%)*)
3407     %
3408     (?P<has_key>\((?P<key>{0})\))?
3409     (?P<format>
3410         (?P<conversion>[#0\-+ ]+)?
3411         (?P<min_width>\d+)?
3412         (?P<precision>\.\d+)?
3413         (?P<len_mod>[hlL])?  # unused in python
3414         {1}  # conversion type
3415     )
3416 '''
3417
3418
3419 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3420
3421
3422 def limit_length(s, length):
3423     """ Add ellipses to overly long strings """
3424     if s is None:
3425         return None
3426     ELLIPSES = '...'
3427     if len(s) > length:
3428         return s[:length - len(ELLIPSES)] + ELLIPSES
3429     return s
3430
3431
3432 def version_tuple(v):
3433     return tuple(int(e) for e in re.split(r'[-.]', v))
3434
3435
3436 def is_outdated_version(version, limit, assume_new=True):
3437     if not version:
3438         return not assume_new
3439     try:
3440         return version_tuple(version) < version_tuple(limit)
3441     except ValueError:
3442         return not assume_new
3443
3444
3445 def ytdl_is_updateable():
3446     """ Returns if yt-dlp can be updated with -U """
3447
3448     from .update import is_non_updateable
3449
3450     return not is_non_updateable()
3451
3452
3453 def args_to_str(args):
3454     # Get a short string representation for a subprocess command
3455     return ' '.join(compat_shlex_quote(a) for a in args)
3456
3457
3458 def error_to_compat_str(err):
3459     return str(err)
3460
3461
3462 def error_to_str(err):
3463     return f'{type(err).__name__}: {err}'
3464
3465
3466 def mimetype2ext(mt):
3467     if mt is None:
3468         return None
3469
3470     mt, _, params = mt.partition(';')
3471     mt = mt.strip()
3472
3473     FULL_MAP = {
3474         'audio/mp4': 'm4a',
3475         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3476         # it's the most popular one
3477         'audio/mpeg': 'mp3',
3478         'audio/x-wav': 'wav',
3479         'audio/wav': 'wav',
3480         'audio/wave': 'wav',
3481     }
3482
3483     ext = FULL_MAP.get(mt)
3484     if ext is not None:
3485         return ext
3486
3487     SUBTYPE_MAP = {
3488         '3gpp': '3gp',
3489         'smptett+xml': 'tt',
3490         'ttaf+xml': 'dfxp',
3491         'ttml+xml': 'ttml',
3492         'x-flv': 'flv',
3493         'x-mp4-fragmented': 'mp4',
3494         'x-ms-sami': 'sami',
3495         'x-ms-wmv': 'wmv',
3496         'mpegurl': 'm3u8',
3497         'x-mpegurl': 'm3u8',
3498         'vnd.apple.mpegurl': 'm3u8',
3499         'dash+xml': 'mpd',
3500         'f4m+xml': 'f4m',
3501         'hds+xml': 'f4m',
3502         'vnd.ms-sstr+xml': 'ism',
3503         'quicktime': 'mov',
3504         'mp2t': 'ts',
3505         'x-wav': 'wav',
3506         'filmstrip+json': 'fs',
3507         'svg+xml': 'svg',
3508     }
3509
3510     _, _, subtype = mt.rpartition('/')
3511     ext = SUBTYPE_MAP.get(subtype.lower())
3512     if ext is not None:
3513         return ext
3514
3515     SUFFIX_MAP = {
3516         'json': 'json',
3517         'xml': 'xml',
3518         'zip': 'zip',
3519         'gzip': 'gz',
3520     }
3521
3522     _, _, suffix = subtype.partition('+')
3523     ext = SUFFIX_MAP.get(suffix)
3524     if ext is not None:
3525         return ext
3526
3527     return subtype.replace('+', '.')
3528
3529
3530 def ext2mimetype(ext_or_url):
3531     if not ext_or_url:
3532         return None
3533     if '.' not in ext_or_url:
3534         ext_or_url = f'file.{ext_or_url}'
3535     return mimetypes.guess_type(ext_or_url)[0]
3536
3537
3538 def parse_codecs(codecs_str):
3539     # http://tools.ietf.org/html/rfc6381
3540     if not codecs_str:
3541         return {}
3542     split_codecs = list(filter(None, map(
3543         str.strip, codecs_str.strip().strip(',').split(','))))
3544     vcodec, acodec, scodec, hdr = None, None, None, None
3545     for full_codec in split_codecs:
3546         parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
3547         if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3548                         'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3549             if vcodec:
3550                 continue
3551             vcodec = full_codec
3552             if parts[0] in ('dvh1', 'dvhe'):
3553                 hdr = 'DV'
3554             elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
3555                 hdr = 'HDR10'
3556             elif parts[:2] == ['vp9', '2']:
3557                 hdr = 'HDR10'
3558         elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac',
3559                           'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3560             acodec = acodec or full_codec
3561         elif parts[0] in ('stpp', 'wvtt'):
3562             scodec = scodec or full_codec
3563         else:
3564             write_string(f'WARNING: Unknown codec {full_codec}\n')
3565     if vcodec or acodec or scodec:
3566         return {
3567             'vcodec': vcodec or 'none',
3568             'acodec': acodec or 'none',
3569             'dynamic_range': hdr,
3570             **({'scodec': scodec} if scodec is not None else {}),
3571         }
3572     elif len(split_codecs) == 2:
3573         return {
3574             'vcodec': split_codecs[0],
3575             'acodec': split_codecs[1],
3576         }
3577     return {}
3578
3579
3580 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
3581     assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
3582
3583     allow_mkv = not preferences or 'mkv' in preferences
3584
3585     if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3586         return 'mkv'  # TODO: any other format allows this?
3587
3588     # TODO: All codecs supported by parse_codecs isn't handled here
3589     COMPATIBLE_CODECS = {
3590         'mp4': {
3591             'av1', 'hevc', 'avc1', 'mp4a',  # fourcc (m3u8, mpd)
3592             'h264', 'aacl', 'ec-3',  # Set in ISM
3593         },
3594         'webm': {
3595             'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3596             'vp9x', 'vp8x',  # in the webm spec
3597         },
3598     }
3599
3600     sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
3601     vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3602
3603     for ext in preferences or COMPATIBLE_CODECS.keys():
3604         codec_set = COMPATIBLE_CODECS.get(ext, set())
3605         if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3606             return ext
3607
3608     COMPATIBLE_EXTS = (
3609         {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3610         {'webm'},
3611     )
3612     for ext in preferences or vexts:
3613         current_exts = {ext, *vexts, *aexts}
3614         if ext == 'mkv' or current_exts == {ext} or any(
3615                 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3616             return ext
3617     return 'mkv' if allow_mkv else preferences[-1]
3618
3619
3620 def urlhandle_detect_ext(url_handle):
3621     getheader = url_handle.headers.get
3622
3623     cd = getheader('Content-Disposition')
3624     if cd:
3625         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3626         if m:
3627             e = determine_ext(m.group('filename'), default_ext=None)
3628             if e:
3629                 return e
3630
3631     return mimetype2ext(getheader('Content-Type'))
3632
3633
3634 def encode_data_uri(data, mime_type):
3635     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3636
3637
3638 def age_restricted(content_limit, age_limit):
3639     """ Returns True iff the content should be blocked """
3640
3641     if age_limit is None:  # No limit set
3642         return False
3643     if content_limit is None:
3644         return False  # Content available for everyone
3645     return age_limit < content_limit
3646
3647
3648 # List of known byte-order-marks (BOM)
3649 BOMS = [
3650     (b'\xef\xbb\xbf', 'utf-8'),
3651     (b'\x00\x00\xfe\xff', 'utf-32-be'),
3652     (b'\xff\xfe\x00\x00', 'utf-32-le'),
3653     (b'\xff\xfe', 'utf-16-le'),
3654     (b'\xfe\xff', 'utf-16-be'),
3655 ]
3656
3657
3658 def is_html(first_bytes):
3659     """ Detect whether a file contains HTML by examining its first bytes. """
3660
3661     encoding = 'utf-8'
3662     for bom, enc in BOMS:
3663         while first_bytes.startswith(bom):
3664             encoding, first_bytes = enc, first_bytes[len(bom):]
3665
3666     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3667
3668
3669 def determine_protocol(info_dict):
3670     protocol = info_dict.get('protocol')
3671     if protocol is not None:
3672         return protocol
3673
3674     url = sanitize_url(info_dict['url'])
3675     if url.startswith('rtmp'):
3676         return 'rtmp'
3677     elif url.startswith('mms'):
3678         return 'mms'
3679     elif url.startswith('rtsp'):
3680         return 'rtsp'
3681
3682     ext = determine_ext(url)
3683     if ext == 'm3u8':
3684         return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
3685     elif ext == 'f4m':
3686         return 'f4m'
3687
3688     return urllib.parse.urlparse(url).scheme
3689
3690
3691 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3692     """ Render a list of rows, each as a list of values.
3693     Text after a \t will be right aligned """
3694     def width(string):
3695         return len(remove_terminal_sequences(string).replace('\t', ''))
3696
3697     def get_max_lens(table):
3698         return [max(width(str(v)) for v in col) for col in zip(*table)]
3699
3700     def filter_using_list(row, filterArray):
3701         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3702
3703     max_lens = get_max_lens(data) if hide_empty else []
3704     header_row = filter_using_list(header_row, max_lens)
3705     data = [filter_using_list(row, max_lens) for row in data]
3706
3707     table = [header_row] + data
3708     max_lens = get_max_lens(table)
3709     extra_gap += 1
3710     if delim:
3711         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3712         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3713     for row in table:
3714         for pos, text in enumerate(map(str, row)):
3715             if '\t' in text:
3716                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3717             else:
3718                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3719     ret = '\n'.join(''.join(row).rstrip() for row in table)
3720     return ret
3721
3722
3723 def _match_one(filter_part, dct, incomplete):
3724     # TODO: Generalize code with YoutubeDL._build_format_filter
3725     STRING_OPERATORS = {
3726         '*=': operator.contains,
3727         '^=': lambda attr, value: attr.startswith(value),
3728         '$=': lambda attr, value: attr.endswith(value),
3729         '~=': lambda attr, value: re.search(value, attr),
3730     }
3731     COMPARISON_OPERATORS = {
3732         **STRING_OPERATORS,
3733         '<=': operator.le,  # "<=" must be defined above "<"
3734         '<': operator.lt,
3735         '>=': operator.ge,
3736         '>': operator.gt,
3737         '=': operator.eq,
3738     }
3739
3740     if isinstance(incomplete, bool):
3741         is_incomplete = lambda _: incomplete
3742     else:
3743         is_incomplete = lambda k: k in incomplete
3744
3745     operator_rex = re.compile(r'''(?x)
3746         (?P<key>[a-z_]+)
3747         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3748         (?:
3749             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3750             (?P<strval>.+?)
3751         )
3752         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3753     m = operator_rex.fullmatch(filter_part.strip())
3754     if m:
3755         m = m.groupdict()
3756         unnegated_op = COMPARISON_OPERATORS[m['op']]
3757         if m['negation']:
3758             op = lambda attr, value: not unnegated_op(attr, value)
3759         else:
3760             op = unnegated_op
3761         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3762         if m['quote']:
3763             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3764         actual_value = dct.get(m['key'])
3765         numeric_comparison = None
3766         if isinstance(actual_value, (int, float)):
3767             # If the original field is a string and matching comparisonvalue is
3768             # a number we should respect the origin of the original field
3769             # and process comparison value as a string (see
3770             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3771             try:
3772                 numeric_comparison = int(comparison_value)
3773             except ValueError:
3774                 numeric_comparison = parse_filesize(comparison_value)
3775                 if numeric_comparison is None:
3776                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3777                 if numeric_comparison is None:
3778                     numeric_comparison = parse_duration(comparison_value)
3779         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3780             raise ValueError('Operator %s only supports string values!' % m['op'])
3781         if actual_value is None:
3782             return is_incomplete(m['key']) or m['none_inclusive']
3783         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3784
3785     UNARY_OPERATORS = {
3786         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3787         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3788     }
3789     operator_rex = re.compile(r'''(?x)
3790         (?P<op>%s)\s*(?P<key>[a-z_]+)
3791         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3792     m = operator_rex.fullmatch(filter_part.strip())
3793     if m:
3794         op = UNARY_OPERATORS[m.group('op')]
3795         actual_value = dct.get(m.group('key'))
3796         if is_incomplete(m.group('key')) and actual_value is None:
3797             return True
3798         return op(actual_value)
3799
3800     raise ValueError('Invalid filter part %r' % filter_part)
3801
3802
3803 def match_str(filter_str, dct, incomplete=False):
3804     """ Filter a dictionary with a simple string syntax.
3805     @returns           Whether the filter passes
3806     @param incomplete  Set of keys that is expected to be missing from dct.
3807                        Can be True/False to indicate all/none of the keys may be missing.
3808                        All conditions on incomplete keys pass if the key is missing
3809     """
3810     return all(
3811         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3812         for filter_part in re.split(r'(?<!\\)&', filter_str))
3813
3814
3815 def match_filter_func(filters):
3816     if not filters:
3817         return None
3818     filters = set(variadic(filters))
3819
3820     interactive = '-' in filters
3821     if interactive:
3822         filters.remove('-')
3823
3824     def _match_func(info_dict, incomplete=False):
3825         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3826             return NO_DEFAULT if interactive and not incomplete else None
3827         else:
3828             video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
3829             filter_str = ') | ('.join(map(str.strip, filters))
3830             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3831     return _match_func
3832
3833
3834 class download_range_func:
3835     def __init__(self, chapters, ranges):
3836         self.chapters, self.ranges = chapters, ranges
3837
3838     def __call__(self, info_dict, ydl):
3839         if not self.ranges and not self.chapters:
3840             yield {}
3841
3842         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3843                    else 'Cannot match chapters since chapter information is unavailable')
3844         for regex in self.chapters or []:
3845             for i, chapter in enumerate(info_dict.get('chapters') or []):
3846                 if re.search(regex, chapter['title']):
3847                     warning = None
3848                     yield {**chapter, 'index': i}
3849         if self.chapters and warning:
3850             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3851
3852         yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
3853
3854     def __eq__(self, other):
3855         return (isinstance(other, download_range_func)
3856                 and self.chapters == other.chapters and self.ranges == other.ranges)
3857
3858
3859 def parse_dfxp_time_expr(time_expr):
3860     if not time_expr:
3861         return
3862
3863     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3864     if mobj:
3865         return float(mobj.group('time_offset'))
3866
3867     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3868     if mobj:
3869         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3870
3871
3872 def srt_subtitles_timecode(seconds):
3873     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3874
3875
3876 def ass_subtitles_timecode(seconds):
3877     time = timetuple_from_msec(seconds * 1000)
3878     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3879
3880
3881 def dfxp2srt(dfxp_data):
3882     '''
3883     @param dfxp_data A bytes-like object containing DFXP data
3884     @returns A unicode object containing converted SRT data
3885     '''
3886     LEGACY_NAMESPACES = (
3887         (b'http://www.w3.org/ns/ttml', [
3888             b'http://www.w3.org/2004/11/ttaf1',
3889             b'http://www.w3.org/2006/04/ttaf1',
3890             b'http://www.w3.org/2006/10/ttaf1',
3891         ]),
3892         (b'http://www.w3.org/ns/ttml#styling', [
3893             b'http://www.w3.org/ns/ttml#style',
3894         ]),
3895     )
3896
3897     SUPPORTED_STYLING = [
3898         'color',
3899         'fontFamily',
3900         'fontSize',
3901         'fontStyle',
3902         'fontWeight',
3903         'textDecoration'
3904     ]
3905
3906     _x = functools.partial(xpath_with_ns, ns_map={
3907         'xml': 'http://www.w3.org/XML/1998/namespace',
3908         'ttml': 'http://www.w3.org/ns/ttml',
3909         'tts': 'http://www.w3.org/ns/ttml#styling',
3910     })
3911
3912     styles = {}
3913     default_style = {}
3914
3915     class TTMLPElementParser:
3916         _out = ''
3917         _unclosed_elements = []
3918         _applied_styles = []
3919
3920         def start(self, tag, attrib):
3921             if tag in (_x('ttml:br'), 'br'):
3922                 self._out += '\n'
3923             else:
3924                 unclosed_elements = []
3925                 style = {}
3926                 element_style_id = attrib.get('style')
3927                 if default_style:
3928                     style.update(default_style)
3929                 if element_style_id:
3930                     style.update(styles.get(element_style_id, {}))
3931                 for prop in SUPPORTED_STYLING:
3932                     prop_val = attrib.get(_x('tts:' + prop))
3933                     if prop_val:
3934                         style[prop] = prop_val
3935                 if style:
3936                     font = ''
3937                     for k, v in sorted(style.items()):
3938                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3939                             continue
3940                         if k == 'color':
3941                             font += ' color="%s"' % v
3942                         elif k == 'fontSize':
3943                             font += ' size="%s"' % v
3944                         elif k == 'fontFamily':
3945                             font += ' face="%s"' % v
3946                         elif k == 'fontWeight' and v == 'bold':
3947                             self._out += '<b>'
3948                             unclosed_elements.append('b')
3949                         elif k == 'fontStyle' and v == 'italic':
3950                             self._out += '<i>'
3951                             unclosed_elements.append('i')
3952                         elif k == 'textDecoration' and v == 'underline':
3953                             self._out += '<u>'
3954                             unclosed_elements.append('u')
3955                     if font:
3956                         self._out += '<font' + font + '>'
3957                         unclosed_elements.append('font')
3958                     applied_style = {}
3959                     if self._applied_styles:
3960                         applied_style.update(self._applied_styles[-1])
3961                     applied_style.update(style)
3962                     self._applied_styles.append(applied_style)
3963                 self._unclosed_elements.append(unclosed_elements)
3964
3965         def end(self, tag):
3966             if tag not in (_x('ttml:br'), 'br'):
3967                 unclosed_elements = self._unclosed_elements.pop()
3968                 for element in reversed(unclosed_elements):
3969                     self._out += '</%s>' % element
3970                 if unclosed_elements and self._applied_styles:
3971                     self._applied_styles.pop()
3972
3973         def data(self, data):
3974             self._out += data
3975
3976         def close(self):
3977             return self._out.strip()
3978
3979     def parse_node(node):
3980         target = TTMLPElementParser()
3981         parser = xml.etree.ElementTree.XMLParser(target=target)
3982         parser.feed(xml.etree.ElementTree.tostring(node))
3983         return parser.close()
3984
3985     for k, v in LEGACY_NAMESPACES:
3986         for ns in v:
3987             dfxp_data = dfxp_data.replace(ns, k)
3988
3989     dfxp = compat_etree_fromstring(dfxp_data)
3990     out = []
3991     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3992
3993     if not paras:
3994         raise ValueError('Invalid dfxp/TTML subtitle')
3995
3996     repeat = False
3997     while True:
3998         for style in dfxp.findall(_x('.//ttml:style')):
3999             style_id = style.get('id') or style.get(_x('xml:id'))
4000             if not style_id:
4001                 continue
4002             parent_style_id = style.get('style')
4003             if parent_style_id:
4004                 if parent_style_id not in styles:
4005                     repeat = True
4006                     continue
4007                 styles[style_id] = styles[parent_style_id].copy()
4008             for prop in SUPPORTED_STYLING:
4009                 prop_val = style.get(_x('tts:' + prop))
4010                 if prop_val:
4011                     styles.setdefault(style_id, {})[prop] = prop_val
4012         if repeat:
4013             repeat = False
4014         else:
4015             break
4016
4017     for p in ('body', 'div'):
4018         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4019         if ele is None:
4020             continue
4021         style = styles.get(ele.get('style'))
4022         if not style:
4023             continue
4024         default_style.update(style)
4025
4026     for para, index in zip(paras, itertools.count(1)):
4027         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4028         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4029         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4030         if begin_time is None:
4031             continue
4032         if not end_time:
4033             if not dur:
4034                 continue
4035             end_time = begin_time + dur
4036         out.append('%d\n%s --> %s\n%s\n\n' % (
4037             index,
4038             srt_subtitles_timecode(begin_time),
4039             srt_subtitles_timecode(end_time),
4040             parse_node(para)))
4041
4042     return ''.join(out)
4043
4044
4045 def cli_option(params, command_option, param, separator=None):
4046     param = params.get(param)
4047     return ([] if param is None
4048             else [command_option, str(param)] if separator is None
4049             else [f'{command_option}{separator}{param}'])
4050
4051
4052 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4053     param = params.get(param)
4054     assert param in (True, False, None)
4055     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
4056
4057
4058 def cli_valueless_option(params, command_option, param, expected_value=True):
4059     return [command_option] if params.get(param) == expected_value else []
4060
4061
4062 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4063     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4064         if use_compat:
4065             return argdict
4066         else:
4067             argdict = None
4068     if argdict is None:
4069         return default
4070     assert isinstance(argdict, dict)
4071
4072     assert isinstance(keys, (list, tuple))
4073     for key_list in keys:
4074         arg_list = list(filter(
4075             lambda x: x is not None,
4076             [argdict.get(key.lower()) for key in variadic(key_list)]))
4077         if arg_list:
4078             return [arg for args in arg_list for arg in args]
4079     return default
4080
4081
4082 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
4083     main_key, exe = main_key.lower(), exe.lower()
4084     root_key = exe if main_key == exe else f'{main_key}+{exe}'
4085     keys = [f'{root_key}{k}' for k in (keys or [''])]
4086     if root_key in keys:
4087         if main_key != exe:
4088             keys.append((main_key, exe))
4089         keys.append('default')
4090     else:
4091         use_compat = False
4092     return cli_configuration_args(argdict, keys, default, use_compat)
4093
4094
4095 class ISO639Utils:
4096     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4097     _lang_map = {
4098         'aa': 'aar',
4099         'ab': 'abk',
4100         'ae': 'ave',
4101         'af': 'afr',
4102         'ak': 'aka',
4103         'am': 'amh',
4104         'an': 'arg',
4105         'ar': 'ara',
4106         'as': 'asm',
4107         'av': 'ava',
4108         'ay': 'aym',
4109         'az': 'aze',
4110         'ba': 'bak',
4111         'be': 'bel',
4112         'bg': 'bul',
4113         'bh': 'bih',
4114         'bi': 'bis',
4115         'bm': 'bam',
4116         'bn': 'ben',
4117         'bo': 'bod',
4118         'br': 'bre',
4119         'bs': 'bos',
4120         'ca': 'cat',
4121         'ce': 'che',
4122         'ch': 'cha',
4123         'co': 'cos',
4124         'cr': 'cre',
4125         'cs': 'ces',
4126         'cu': 'chu',
4127         'cv': 'chv',
4128         'cy': 'cym',
4129         'da': 'dan',
4130         'de': 'deu',
4131         'dv': 'div',
4132         'dz': 'dzo',
4133         'ee': 'ewe',
4134         'el': 'ell',
4135         'en': 'eng',
4136         'eo': 'epo',
4137         'es': 'spa',
4138         'et': 'est',
4139         'eu': 'eus',
4140         'fa': 'fas',
4141         'ff': 'ful',
4142         'fi': 'fin',
4143         'fj': 'fij',
4144         'fo': 'fao',
4145         'fr': 'fra',
4146         'fy': 'fry',
4147         'ga': 'gle',
4148         'gd': 'gla',
4149         'gl': 'glg',
4150         'gn': 'grn',
4151         'gu': 'guj',
4152         'gv': 'glv',
4153         'ha': 'hau',
4154         'he': 'heb',
4155         'iw': 'heb',  # Replaced by he in 1989 revision
4156         'hi': 'hin',
4157         'ho': 'hmo',
4158         'hr': 'hrv',
4159         'ht': 'hat',
4160         'hu': 'hun',
4161         'hy': 'hye',
4162         'hz': 'her',
4163         'ia': 'ina',
4164         'id': 'ind',
4165         'in': 'ind',  # Replaced by id in 1989 revision
4166         'ie': 'ile',
4167         'ig': 'ibo',
4168         'ii': 'iii',
4169         'ik': 'ipk',
4170         'io': 'ido',
4171         'is': 'isl',
4172         'it': 'ita',
4173         'iu': 'iku',
4174         'ja': 'jpn',
4175         'jv': 'jav',
4176         'ka': 'kat',
4177         'kg': 'kon',
4178         'ki': 'kik',
4179         'kj': 'kua',
4180         'kk': 'kaz',
4181         'kl': 'kal',
4182         'km': 'khm',
4183         'kn': 'kan',
4184         'ko': 'kor',
4185         'kr': 'kau',
4186         'ks': 'kas',
4187         'ku': 'kur',
4188         'kv': 'kom',
4189         'kw': 'cor',
4190         'ky': 'kir',
4191         'la': 'lat',
4192         'lb': 'ltz',
4193         'lg': 'lug',
4194         'li': 'lim',
4195         'ln': 'lin',
4196         'lo': 'lao',
4197         'lt': 'lit',
4198         'lu': 'lub',
4199         'lv': 'lav',
4200         'mg': 'mlg',
4201         'mh': 'mah',
4202         'mi': 'mri',
4203         'mk': 'mkd',
4204         'ml': 'mal',
4205         'mn': 'mon',
4206         'mr': 'mar',
4207         'ms': 'msa',
4208         'mt': 'mlt',
4209         'my': 'mya',
4210         'na': 'nau',
4211         'nb': 'nob',
4212         'nd': 'nde',
4213         'ne': 'nep',
4214         'ng': 'ndo',
4215         'nl': 'nld',
4216         'nn': 'nno',
4217         'no': 'nor',
4218         'nr': 'nbl',
4219         'nv': 'nav',
4220         'ny': 'nya',
4221         'oc': 'oci',
4222         'oj': 'oji',
4223         'om': 'orm',
4224         'or': 'ori',
4225         'os': 'oss',
4226         'pa': 'pan',
4227         'pi': 'pli',
4228         'pl': 'pol',
4229         'ps': 'pus',
4230         'pt': 'por',
4231         'qu': 'que',
4232         'rm': 'roh',
4233         'rn': 'run',
4234         'ro': 'ron',
4235         'ru': 'rus',
4236         'rw': 'kin',
4237         'sa': 'san',
4238         'sc': 'srd',
4239         'sd': 'snd',
4240         'se': 'sme',
4241         'sg': 'sag',
4242         'si': 'sin',
4243         'sk': 'slk',
4244         'sl': 'slv',
4245         'sm': 'smo',
4246         'sn': 'sna',
4247         'so': 'som',
4248         'sq': 'sqi',
4249         'sr': 'srp',
4250         'ss': 'ssw',
4251         'st': 'sot',
4252         'su': 'sun',
4253         'sv': 'swe',
4254         'sw': 'swa',
4255         'ta': 'tam',
4256         'te': 'tel',
4257         'tg': 'tgk',
4258         'th': 'tha',
4259         'ti': 'tir',
4260         'tk': 'tuk',
4261         'tl': 'tgl',
4262         'tn': 'tsn',
4263         'to': 'ton',
4264         'tr': 'tur',
4265         'ts': 'tso',
4266         'tt': 'tat',
4267         'tw': 'twi',
4268         'ty': 'tah',
4269         'ug': 'uig',
4270         'uk': 'ukr',
4271         'ur': 'urd',
4272         'uz': 'uzb',
4273         've': 'ven',
4274         'vi': 'vie',
4275         'vo': 'vol',
4276         'wa': 'wln',
4277         'wo': 'wol',
4278         'xh': 'xho',
4279         'yi': 'yid',
4280         'ji': 'yid',  # Replaced by yi in 1989 revision
4281         'yo': 'yor',
4282         'za': 'zha',
4283         'zh': 'zho',
4284         'zu': 'zul',
4285     }
4286
4287     @classmethod
4288     def short2long(cls, code):
4289         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4290         return cls._lang_map.get(code[:2])
4291
4292     @classmethod
4293     def long2short(cls, code):
4294         """Convert language code from ISO 639-2/T to ISO 639-1"""
4295         for short_name, long_name in cls._lang_map.items():
4296             if long_name == code:
4297                 return short_name
4298
4299
4300 class ISO3166Utils:
4301     # From http://data.okfn.org/data/core/country-list
4302     _country_map = {
4303         'AF': 'Afghanistan',
4304         'AX': 'Åland Islands',
4305         'AL': 'Albania',
4306         'DZ': 'Algeria',
4307         'AS': 'American Samoa',
4308         'AD': 'Andorra',
4309         'AO': 'Angola',
4310         'AI': 'Anguilla',
4311         'AQ': 'Antarctica',
4312         'AG': 'Antigua and Barbuda',
4313         'AR': 'Argentina',
4314         'AM': 'Armenia',
4315         'AW': 'Aruba',
4316         'AU': 'Australia',
4317         'AT': 'Austria',
4318         'AZ': 'Azerbaijan',
4319         'BS': 'Bahamas',
4320         'BH': 'Bahrain',
4321         'BD': 'Bangladesh',
4322         'BB': 'Barbados',
4323         'BY': 'Belarus',
4324         'BE': 'Belgium',
4325         'BZ': 'Belize',
4326         'BJ': 'Benin',
4327         'BM': 'Bermuda',
4328         'BT': 'Bhutan',
4329         'BO': 'Bolivia, Plurinational State of',
4330         'BQ': 'Bonaire, Sint Eustatius and Saba',
4331         'BA': 'Bosnia and Herzegovina',
4332         'BW': 'Botswana',
4333         'BV': 'Bouvet Island',
4334         'BR': 'Brazil',
4335         'IO': 'British Indian Ocean Territory',
4336         'BN': 'Brunei Darussalam',
4337         'BG': 'Bulgaria',
4338         'BF': 'Burkina Faso',
4339         'BI': 'Burundi',
4340         'KH': 'Cambodia',
4341         'CM': 'Cameroon',
4342         'CA': 'Canada',
4343         'CV': 'Cape Verde',
4344         'KY': 'Cayman Islands',
4345         'CF': 'Central African Republic',
4346         'TD': 'Chad',
4347         'CL': 'Chile',
4348         'CN': 'China',
4349         'CX': 'Christmas Island',
4350         'CC': 'Cocos (Keeling) Islands',
4351         'CO': 'Colombia',
4352         'KM': 'Comoros',
4353         'CG': 'Congo',
4354         'CD': 'Congo, the Democratic Republic of the',
4355         'CK': 'Cook Islands',
4356         'CR': 'Costa Rica',
4357         'CI': 'Côte d\'Ivoire',
4358         'HR': 'Croatia',
4359         'CU': 'Cuba',
4360         'CW': 'Curaçao',
4361         'CY': 'Cyprus',
4362         'CZ': 'Czech Republic',
4363         'DK': 'Denmark',
4364         'DJ': 'Djibouti',
4365         'DM': 'Dominica',
4366         'DO': 'Dominican Republic',
4367         'EC': 'Ecuador',
4368         'EG': 'Egypt',
4369         'SV': 'El Salvador',
4370         'GQ': 'Equatorial Guinea',
4371         'ER': 'Eritrea',
4372         'EE': 'Estonia',
4373         'ET': 'Ethiopia',
4374         'FK': 'Falkland Islands (Malvinas)',
4375         'FO': 'Faroe Islands',
4376         'FJ': 'Fiji',
4377         'FI': 'Finland',
4378         'FR': 'France',
4379         'GF': 'French Guiana',
4380         'PF': 'French Polynesia',
4381         'TF': 'French Southern Territories',
4382         'GA': 'Gabon',
4383         'GM': 'Gambia',
4384         'GE': 'Georgia',
4385         'DE': 'Germany',
4386         'GH': 'Ghana',
4387         'GI': 'Gibraltar',
4388         'GR': 'Greece',
4389         'GL': 'Greenland',
4390         'GD': 'Grenada',
4391         'GP': 'Guadeloupe',
4392         'GU': 'Guam',
4393         'GT': 'Guatemala',
4394         'GG': 'Guernsey',
4395         'GN': 'Guinea',
4396         'GW': 'Guinea-Bissau',
4397         'GY': 'Guyana',
4398         'HT': 'Haiti',
4399         'HM': 'Heard Island and McDonald Islands',
4400         'VA': 'Holy See (Vatican City State)',
4401         'HN': 'Honduras',
4402         'HK': 'Hong Kong',
4403         'HU': 'Hungary',
4404         'IS': 'Iceland',
4405         'IN': 'India',
4406         'ID': 'Indonesia',
4407         'IR': 'Iran, Islamic Republic of',
4408         'IQ': 'Iraq',
4409         'IE': 'Ireland',
4410         'IM': 'Isle of Man',
4411         'IL': 'Israel',
4412         'IT': 'Italy',
4413         'JM': 'Jamaica',
4414         'JP': 'Japan',
4415         'JE': 'Jersey',
4416         'JO': 'Jordan',
4417         'KZ': 'Kazakhstan',
4418         'KE': 'Kenya',
4419         'KI': 'Kiribati',
4420         'KP': 'Korea, Democratic People\'s Republic of',
4421         'KR': 'Korea, Republic of',
4422         'KW': 'Kuwait',
4423         'KG': 'Kyrgyzstan',
4424         'LA': 'Lao People\'s Democratic Republic',
4425         'LV': 'Latvia',
4426         'LB': 'Lebanon',
4427         'LS': 'Lesotho',
4428         'LR': 'Liberia',
4429         'LY': 'Libya',
4430         'LI': 'Liechtenstein',
4431         'LT': 'Lithuania',
4432         'LU': 'Luxembourg',
4433         'MO': 'Macao',
4434         'MK': 'Macedonia, the Former Yugoslav Republic of',
4435         'MG': 'Madagascar',
4436         'MW': 'Malawi',
4437         'MY': 'Malaysia',
4438         'MV': 'Maldives',
4439         'ML': 'Mali',
4440         'MT': 'Malta',
4441         'MH': 'Marshall Islands',
4442         'MQ': 'Martinique',
4443         'MR': 'Mauritania',
4444         'MU': 'Mauritius',
4445         'YT': 'Mayotte',
4446         'MX': 'Mexico',
4447         'FM': 'Micronesia, Federated States of',
4448         'MD': 'Moldova, Republic of',
4449         'MC': 'Monaco',
4450         'MN': 'Mongolia',
4451         'ME': 'Montenegro',
4452         'MS': 'Montserrat',
4453         'MA': 'Morocco',
4454         'MZ': 'Mozambique',
4455         'MM': 'Myanmar',
4456         'NA': 'Namibia',
4457         'NR': 'Nauru',
4458         'NP': 'Nepal',
4459         'NL': 'Netherlands',
4460         'NC': 'New Caledonia',
4461         'NZ': 'New Zealand',
4462         'NI': 'Nicaragua',
4463         'NE': 'Niger',
4464         'NG': 'Nigeria',
4465         'NU': 'Niue',
4466         'NF': 'Norfolk Island',
4467         'MP': 'Northern Mariana Islands',
4468         'NO': 'Norway',
4469         'OM': 'Oman',
4470         'PK': 'Pakistan',
4471         'PW': 'Palau',
4472         'PS': 'Palestine, State of',
4473         'PA': 'Panama',
4474         'PG': 'Papua New Guinea',
4475         'PY': 'Paraguay',
4476         'PE': 'Peru',
4477         'PH': 'Philippines',
4478         'PN': 'Pitcairn',
4479         'PL': 'Poland',
4480         'PT': 'Portugal',
4481         'PR': 'Puerto Rico',
4482         'QA': 'Qatar',
4483         'RE': 'Réunion',
4484         'RO': 'Romania',
4485         'RU': 'Russian Federation',
4486         'RW': 'Rwanda',
4487         'BL': 'Saint Barthélemy',
4488         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4489         'KN': 'Saint Kitts and Nevis',
4490         'LC': 'Saint Lucia',
4491         'MF': 'Saint Martin (French part)',
4492         'PM': 'Saint Pierre and Miquelon',
4493         'VC': 'Saint Vincent and the Grenadines',
4494         'WS': 'Samoa',
4495         'SM': 'San Marino',
4496         'ST': 'Sao Tome and Principe',
4497         'SA': 'Saudi Arabia',
4498         'SN': 'Senegal',
4499         'RS': 'Serbia',
4500         'SC': 'Seychelles',
4501         'SL': 'Sierra Leone',
4502         'SG': 'Singapore',
4503         'SX': 'Sint Maarten (Dutch part)',
4504         'SK': 'Slovakia',
4505         'SI': 'Slovenia',
4506         'SB': 'Solomon Islands',
4507         'SO': 'Somalia',
4508         'ZA': 'South Africa',
4509         'GS': 'South Georgia and the South Sandwich Islands',
4510         'SS': 'South Sudan',
4511         'ES': 'Spain',
4512         'LK': 'Sri Lanka',
4513         'SD': 'Sudan',
4514         'SR': 'Suriname',
4515         'SJ': 'Svalbard and Jan Mayen',
4516         'SZ': 'Swaziland',
4517         'SE': 'Sweden',
4518         'CH': 'Switzerland',
4519         'SY': 'Syrian Arab Republic',
4520         'TW': 'Taiwan, Province of China',
4521         'TJ': 'Tajikistan',
4522         'TZ': 'Tanzania, United Republic of',
4523         'TH': 'Thailand',
4524         'TL': 'Timor-Leste',
4525         'TG': 'Togo',
4526         'TK': 'Tokelau',
4527         'TO': 'Tonga',
4528         'TT': 'Trinidad and Tobago',
4529         'TN': 'Tunisia',
4530         'TR': 'Turkey',
4531         'TM': 'Turkmenistan',
4532         'TC': 'Turks and Caicos Islands',
4533         'TV': 'Tuvalu',
4534         'UG': 'Uganda',
4535         'UA': 'Ukraine',
4536         'AE': 'United Arab Emirates',
4537         'GB': 'United Kingdom',
4538         'US': 'United States',
4539         'UM': 'United States Minor Outlying Islands',
4540         'UY': 'Uruguay',
4541         'UZ': 'Uzbekistan',
4542         'VU': 'Vanuatu',
4543         'VE': 'Venezuela, Bolivarian Republic of',
4544         'VN': 'Viet Nam',
4545         'VG': 'Virgin Islands, British',
4546         'VI': 'Virgin Islands, U.S.',
4547         'WF': 'Wallis and Futuna',
4548         'EH': 'Western Sahara',
4549         'YE': 'Yemen',
4550         'ZM': 'Zambia',
4551         'ZW': 'Zimbabwe',
4552         # Not ISO 3166 codes, but used for IP blocks
4553         'AP': 'Asia/Pacific Region',
4554         'EU': 'Europe',
4555     }
4556
4557     @classmethod
4558     def short2full(cls, code):
4559         """Convert an ISO 3166-2 country code to the corresponding full name"""
4560         return cls._country_map.get(code.upper())
4561
4562
4563 class GeoUtils:
4564     # Major IPv4 address blocks per country
4565     _country_ip_map = {
4566         'AD': '46.172.224.0/19',
4567         'AE': '94.200.0.0/13',
4568         'AF': '149.54.0.0/17',
4569         'AG': '209.59.64.0/18',
4570         'AI': '204.14.248.0/21',
4571         'AL': '46.99.0.0/16',
4572         'AM': '46.70.0.0/15',
4573         'AO': '105.168.0.0/13',
4574         'AP': '182.50.184.0/21',
4575         'AQ': '23.154.160.0/24',
4576         'AR': '181.0.0.0/12',
4577         'AS': '202.70.112.0/20',
4578         'AT': '77.116.0.0/14',
4579         'AU': '1.128.0.0/11',
4580         'AW': '181.41.0.0/18',
4581         'AX': '185.217.4.0/22',
4582         'AZ': '5.197.0.0/16',
4583         'BA': '31.176.128.0/17',
4584         'BB': '65.48.128.0/17',
4585         'BD': '114.130.0.0/16',
4586         'BE': '57.0.0.0/8',
4587         'BF': '102.178.0.0/15',
4588         'BG': '95.42.0.0/15',
4589         'BH': '37.131.0.0/17',
4590         'BI': '154.117.192.0/18',
4591         'BJ': '137.255.0.0/16',
4592         'BL': '185.212.72.0/23',
4593         'BM': '196.12.64.0/18',
4594         'BN': '156.31.0.0/16',
4595         'BO': '161.56.0.0/16',
4596         'BQ': '161.0.80.0/20',
4597         'BR': '191.128.0.0/12',
4598         'BS': '24.51.64.0/18',
4599         'BT': '119.2.96.0/19',
4600         'BW': '168.167.0.0/16',
4601         'BY': '178.120.0.0/13',
4602         'BZ': '179.42.192.0/18',
4603         'CA': '99.224.0.0/11',
4604         'CD': '41.243.0.0/16',
4605         'CF': '197.242.176.0/21',
4606         'CG': '160.113.0.0/16',
4607         'CH': '85.0.0.0/13',
4608         'CI': '102.136.0.0/14',
4609         'CK': '202.65.32.0/19',
4610         'CL': '152.172.0.0/14',
4611         'CM': '102.244.0.0/14',
4612         'CN': '36.128.0.0/10',
4613         'CO': '181.240.0.0/12',
4614         'CR': '201.192.0.0/12',
4615         'CU': '152.206.0.0/15',
4616         'CV': '165.90.96.0/19',
4617         'CW': '190.88.128.0/17',
4618         'CY': '31.153.0.0/16',
4619         'CZ': '88.100.0.0/14',
4620         'DE': '53.0.0.0/8',
4621         'DJ': '197.241.0.0/17',
4622         'DK': '87.48.0.0/12',
4623         'DM': '192.243.48.0/20',
4624         'DO': '152.166.0.0/15',
4625         'DZ': '41.96.0.0/12',
4626         'EC': '186.68.0.0/15',
4627         'EE': '90.190.0.0/15',
4628         'EG': '156.160.0.0/11',
4629         'ER': '196.200.96.0/20',
4630         'ES': '88.0.0.0/11',
4631         'ET': '196.188.0.0/14',
4632         'EU': '2.16.0.0/13',
4633         'FI': '91.152.0.0/13',
4634         'FJ': '144.120.0.0/16',
4635         'FK': '80.73.208.0/21',
4636         'FM': '119.252.112.0/20',
4637         'FO': '88.85.32.0/19',
4638         'FR': '90.0.0.0/9',
4639         'GA': '41.158.0.0/15',
4640         'GB': '25.0.0.0/8',
4641         'GD': '74.122.88.0/21',
4642         'GE': '31.146.0.0/16',
4643         'GF': '161.22.64.0/18',
4644         'GG': '62.68.160.0/19',
4645         'GH': '154.160.0.0/12',
4646         'GI': '95.164.0.0/16',
4647         'GL': '88.83.0.0/19',
4648         'GM': '160.182.0.0/15',
4649         'GN': '197.149.192.0/18',
4650         'GP': '104.250.0.0/19',
4651         'GQ': '105.235.224.0/20',
4652         'GR': '94.64.0.0/13',
4653         'GT': '168.234.0.0/16',
4654         'GU': '168.123.0.0/16',
4655         'GW': '197.214.80.0/20',
4656         'GY': '181.41.64.0/18',
4657         'HK': '113.252.0.0/14',
4658         'HN': '181.210.0.0/16',
4659         'HR': '93.136.0.0/13',
4660         'HT': '148.102.128.0/17',
4661         'HU': '84.0.0.0/14',
4662         'ID': '39.192.0.0/10',
4663         'IE': '87.32.0.0/12',
4664         'IL': '79.176.0.0/13',
4665         'IM': '5.62.80.0/20',
4666         'IN': '117.192.0.0/10',
4667         'IO': '203.83.48.0/21',
4668         'IQ': '37.236.0.0/14',
4669         'IR': '2.176.0.0/12',
4670         'IS': '82.221.0.0/16',
4671         'IT': '79.0.0.0/10',
4672         'JE': '87.244.64.0/18',
4673         'JM': '72.27.0.0/17',
4674         'JO': '176.29.0.0/16',
4675         'JP': '133.0.0.0/8',
4676         'KE': '105.48.0.0/12',
4677         'KG': '158.181.128.0/17',
4678         'KH': '36.37.128.0/17',
4679         'KI': '103.25.140.0/22',
4680         'KM': '197.255.224.0/20',
4681         'KN': '198.167.192.0/19',
4682         'KP': '175.45.176.0/22',
4683         'KR': '175.192.0.0/10',
4684         'KW': '37.36.0.0/14',
4685         'KY': '64.96.0.0/15',
4686         'KZ': '2.72.0.0/13',
4687         'LA': '115.84.64.0/18',
4688         'LB': '178.135.0.0/16',
4689         'LC': '24.92.144.0/20',
4690         'LI': '82.117.0.0/19',
4691         'LK': '112.134.0.0/15',
4692         'LR': '102.183.0.0/16',
4693         'LS': '129.232.0.0/17',
4694         'LT': '78.56.0.0/13',
4695         'LU': '188.42.0.0/16',
4696         'LV': '46.109.0.0/16',
4697         'LY': '41.252.0.0/14',
4698         'MA': '105.128.0.0/11',
4699         'MC': '88.209.64.0/18',
4700         'MD': '37.246.0.0/16',
4701         'ME': '178.175.0.0/17',
4702         'MF': '74.112.232.0/21',
4703         'MG': '154.126.0.0/17',
4704         'MH': '117.103.88.0/21',
4705         'MK': '77.28.0.0/15',
4706         'ML': '154.118.128.0/18',
4707         'MM': '37.111.0.0/17',
4708         'MN': '49.0.128.0/17',
4709         'MO': '60.246.0.0/16',
4710         'MP': '202.88.64.0/20',
4711         'MQ': '109.203.224.0/19',
4712         'MR': '41.188.64.0/18',
4713         'MS': '208.90.112.0/22',
4714         'MT': '46.11.0.0/16',
4715         'MU': '105.16.0.0/12',
4716         'MV': '27.114.128.0/18',
4717         'MW': '102.70.0.0/15',
4718         'MX': '187.192.0.0/11',
4719         'MY': '175.136.0.0/13',
4720         'MZ': '197.218.0.0/15',
4721         'NA': '41.182.0.0/16',
4722         'NC': '101.101.0.0/18',
4723         'NE': '197.214.0.0/18',
4724         'NF': '203.17.240.0/22',
4725         'NG': '105.112.0.0/12',
4726         'NI': '186.76.0.0/15',
4727         'NL': '145.96.0.0/11',
4728         'NO': '84.208.0.0/13',
4729         'NP': '36.252.0.0/15',
4730         'NR': '203.98.224.0/19',
4731         'NU': '49.156.48.0/22',
4732         'NZ': '49.224.0.0/14',
4733         'OM': '5.36.0.0/15',
4734         'PA': '186.72.0.0/15',
4735         'PE': '186.160.0.0/14',
4736         'PF': '123.50.64.0/18',
4737         'PG': '124.240.192.0/19',
4738         'PH': '49.144.0.0/13',
4739         'PK': '39.32.0.0/11',
4740         'PL': '83.0.0.0/11',
4741         'PM': '70.36.0.0/20',
4742         'PR': '66.50.0.0/16',
4743         'PS': '188.161.0.0/16',
4744         'PT': '85.240.0.0/13',
4745         'PW': '202.124.224.0/20',
4746         'PY': '181.120.0.0/14',
4747         'QA': '37.210.0.0/15',
4748         'RE': '102.35.0.0/16',
4749         'RO': '79.112.0.0/13',
4750         'RS': '93.86.0.0/15',
4751         'RU': '5.136.0.0/13',
4752         'RW': '41.186.0.0/16',
4753         'SA': '188.48.0.0/13',
4754         'SB': '202.1.160.0/19',
4755         'SC': '154.192.0.0/11',
4756         'SD': '102.120.0.0/13',
4757         'SE': '78.64.0.0/12',
4758         'SG': '8.128.0.0/10',
4759         'SI': '188.196.0.0/14',
4760         'SK': '78.98.0.0/15',
4761         'SL': '102.143.0.0/17',
4762         'SM': '89.186.32.0/19',
4763         'SN': '41.82.0.0/15',
4764         'SO': '154.115.192.0/18',
4765         'SR': '186.179.128.0/17',
4766         'SS': '105.235.208.0/21',
4767         'ST': '197.159.160.0/19',
4768         'SV': '168.243.0.0/16',
4769         'SX': '190.102.0.0/20',
4770         'SY': '5.0.0.0/16',
4771         'SZ': '41.84.224.0/19',
4772         'TC': '65.255.48.0/20',
4773         'TD': '154.68.128.0/19',
4774         'TG': '196.168.0.0/14',
4775         'TH': '171.96.0.0/13',
4776         'TJ': '85.9.128.0/18',
4777         'TK': '27.96.24.0/21',
4778         'TL': '180.189.160.0/20',
4779         'TM': '95.85.96.0/19',
4780         'TN': '197.0.0.0/11',
4781         'TO': '175.176.144.0/21',
4782         'TR': '78.160.0.0/11',
4783         'TT': '186.44.0.0/15',
4784         'TV': '202.2.96.0/19',
4785         'TW': '120.96.0.0/11',
4786         'TZ': '156.156.0.0/14',
4787         'UA': '37.52.0.0/14',
4788         'UG': '102.80.0.0/13',
4789         'US': '6.0.0.0/8',
4790         'UY': '167.56.0.0/13',
4791         'UZ': '84.54.64.0/18',
4792         'VA': '212.77.0.0/19',
4793         'VC': '207.191.240.0/21',
4794         'VE': '186.88.0.0/13',
4795         'VG': '66.81.192.0/20',
4796         'VI': '146.226.0.0/16',
4797         'VN': '14.160.0.0/11',
4798         'VU': '202.80.32.0/20',
4799         'WF': '117.20.32.0/21',
4800         'WS': '202.4.32.0/19',
4801         'YE': '134.35.0.0/16',
4802         'YT': '41.242.116.0/22',
4803         'ZA': '41.0.0.0/11',
4804         'ZM': '102.144.0.0/13',
4805         'ZW': '102.177.192.0/18',
4806     }
4807
4808     @classmethod
4809     def random_ipv4(cls, code_or_block):
4810         if len(code_or_block) == 2:
4811             block = cls._country_ip_map.get(code_or_block.upper())
4812             if not block:
4813                 return None
4814         else:
4815             block = code_or_block
4816         addr, preflen = block.split('/')
4817         addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
4818         addr_max = addr_min | (0xffffffff >> int(preflen))
4819         return str(socket.inet_ntoa(
4820             struct.pack('!L', random.randint(addr_min, addr_max))))
4821
4822
4823 class PerRequestProxyHandler(urllib.request.ProxyHandler):
4824     def __init__(self, proxies=None):
4825         # Set default handlers
4826         for type in ('http', 'https'):
4827             setattr(self, '%s_open' % type,
4828                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4829                         meth(r, proxy, type))
4830         urllib.request.ProxyHandler.__init__(self, proxies)
4831
4832     def proxy_open(self, req, proxy, type):
4833         req_proxy = req.headers.get('Ytdl-request-proxy')
4834         if req_proxy is not None:
4835             proxy = req_proxy
4836             del req.headers['Ytdl-request-proxy']
4837
4838         if proxy == '__noproxy__':
4839             return None  # No Proxy
4840         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4841             req.add_header('Ytdl-socks-proxy', proxy)
4842             # yt-dlp's http/https handlers do wrapping the socket with socks
4843             return None
4844         return urllib.request.ProxyHandler.proxy_open(
4845             self, req, proxy, type)
4846
4847
4848 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4849 # released into Public Domain
4850 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4851
4852 def long_to_bytes(n, blocksize=0):
4853     """long_to_bytes(n:long, blocksize:int) : string
4854     Convert a long integer to a byte string.
4855
4856     If optional blocksize is given and greater than zero, pad the front of the
4857     byte string with binary zeros so that the length is a multiple of
4858     blocksize.
4859     """
4860     # after much testing, this algorithm was deemed to be the fastest
4861     s = b''
4862     n = int(n)
4863     while n > 0:
4864         s = struct.pack('>I', n & 0xffffffff) + s
4865         n = n >> 32
4866     # strip off leading zeros
4867     for i in range(len(s)):
4868         if s[i] != b'\000'[0]:
4869             break
4870     else:
4871         # only happens when n == 0
4872         s = b'\000'
4873         i = 0
4874     s = s[i:]
4875     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4876     # de-padding being done above, but sigh...
4877     if blocksize > 0 and len(s) % blocksize:
4878         s = (blocksize - len(s) % blocksize) * b'\000' + s
4879     return s
4880
4881
4882 def bytes_to_long(s):
4883     """bytes_to_long(string) : long
4884     Convert a byte string to a long integer.
4885
4886     This is (essentially) the inverse of long_to_bytes().
4887     """
4888     acc = 0
4889     length = len(s)
4890     if length % 4:
4891         extra = (4 - length % 4)
4892         s = b'\000' * extra + s
4893         length = length + extra
4894     for i in range(0, length, 4):
4895         acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
4896     return acc
4897
4898
4899 def ohdave_rsa_encrypt(data, exponent, modulus):
4900     '''
4901     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4902
4903     Input:
4904         data: data to encrypt, bytes-like object
4905         exponent, modulus: parameter e and N of RSA algorithm, both integer
4906     Output: hex string of encrypted data
4907
4908     Limitation: supports one block encryption only
4909     '''
4910
4911     payload = int(binascii.hexlify(data[::-1]), 16)
4912     encrypted = pow(payload, exponent, modulus)
4913     return '%x' % encrypted
4914
4915
4916 def pkcs1pad(data, length):
4917     """
4918     Padding input data with PKCS#1 scheme
4919
4920     @param {int[]} data        input data
4921     @param {int}   length      target length
4922     @returns {int[]}           padded data
4923     """
4924     if len(data) > length - 11:
4925         raise ValueError('Input data too long for PKCS#1 padding')
4926
4927     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4928     return [0, 2] + pseudo_random + [0] + data
4929
4930
4931 def _base_n_table(n, table):
4932     if not table and not n:
4933         raise ValueError('Either table or n must be specified')
4934     table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
4935
4936     if n and n != len(table):
4937         raise ValueError(f'base {n} exceeds table length {len(table)}')
4938     return table
4939
4940
4941 def encode_base_n(num, n=None, table=None):
4942     """Convert given int to a base-n string"""
4943     table = _base_n_table(n, table)
4944     if not num:
4945         return table[0]
4946
4947     result, base = '', len(table)
4948     while num:
4949         result = table[num % base] + result
4950         num = num // base
4951     return result
4952
4953
4954 def decode_base_n(string, n=None, table=None):
4955     """Convert given base-n string to int"""
4956     table = {char: index for index, char in enumerate(_base_n_table(n, table))}
4957     result, base = 0, len(table)
4958     for char in string:
4959         result = result * base + table[char]
4960     return result
4961
4962
4963 def decode_base(value, digits):
4964     deprecation_warning(f'{__name__}.decode_base is deprecated and may be removed '
4965                         f'in a future version. Use {__name__}.decode_base_n instead')
4966     return decode_base_n(value, table=digits)
4967
4968
4969 def decode_packed_codes(code):
4970     mobj = re.search(PACKED_CODES_RE, code)
4971     obfuscated_code, base, count, symbols = mobj.groups()
4972     base = int(base)
4973     count = int(count)
4974     symbols = symbols.split('|')
4975     symbol_table = {}
4976
4977     while count:
4978         count -= 1
4979         base_n_count = encode_base_n(count, base)
4980         symbol_table[base_n_count] = symbols[count] or base_n_count
4981
4982     return re.sub(
4983         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4984         obfuscated_code)
4985
4986
4987 def caesar(s, alphabet, shift):
4988     if shift == 0:
4989         return s
4990     l = len(alphabet)
4991     return ''.join(
4992         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4993         for c in s)
4994
4995
4996 def rot47(s):
4997     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4998
4999
5000 def parse_m3u8_attributes(attrib):
5001     info = {}
5002     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5003         if val.startswith('"'):
5004             val = val[1:-1]
5005         info[key] = val
5006     return info
5007
5008
5009 def urshift(val, n):
5010     return val >> n if val >= 0 else (val + 0x100000000) >> n
5011
5012
5013 # Based on png2str() written by @gdkchan and improved by @yokrysty
5014 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5015 def decode_png(png_data):
5016     # Reference: https://www.w3.org/TR/PNG/
5017     header = png_data[8:]
5018
5019     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5020         raise OSError('Not a valid PNG file.')
5021
5022     int_map = {1: '>B', 2: '>H', 4: '>I'}
5023     unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
5024
5025     chunks = []
5026
5027     while header:
5028         length = unpack_integer(header[:4])
5029         header = header[4:]
5030
5031         chunk_type = header[:4]
5032         header = header[4:]
5033
5034         chunk_data = header[:length]
5035         header = header[length:]
5036
5037         header = header[4:]  # Skip CRC
5038
5039         chunks.append({
5040             'type': chunk_type,
5041             'length': length,
5042             'data': chunk_data
5043         })
5044
5045     ihdr = chunks[0]['data']
5046
5047     width = unpack_integer(ihdr[:4])
5048     height = unpack_integer(ihdr[4:8])
5049
5050     idat = b''
5051
5052     for chunk in chunks:
5053         if chunk['type'] == b'IDAT':
5054             idat += chunk['data']
5055
5056     if not idat:
5057         raise OSError('Unable to read PNG data.')
5058
5059     decompressed_data = bytearray(zlib.decompress(idat))
5060
5061     stride = width * 3
5062     pixels = []
5063
5064     def _get_pixel(idx):
5065         x = idx % stride
5066         y = idx // stride
5067         return pixels[y][x]
5068
5069     for y in range(height):
5070         basePos = y * (1 + stride)
5071         filter_type = decompressed_data[basePos]
5072
5073         current_row = []
5074
5075         pixels.append(current_row)
5076
5077         for x in range(stride):
5078             color = decompressed_data[1 + basePos + x]
5079             basex = y * stride + x
5080             left = 0
5081             up = 0
5082
5083             if x > 2:
5084                 left = _get_pixel(basex - 3)
5085             if y > 0:
5086                 up = _get_pixel(basex - stride)
5087
5088             if filter_type == 1:  # Sub
5089                 color = (color + left) & 0xff
5090             elif filter_type == 2:  # Up
5091                 color = (color + up) & 0xff
5092             elif filter_type == 3:  # Average
5093                 color = (color + ((left + up) >> 1)) & 0xff
5094             elif filter_type == 4:  # Paeth
5095                 a = left
5096                 b = up
5097                 c = 0
5098
5099                 if x > 2 and y > 0:
5100                     c = _get_pixel(basex - stride - 3)
5101
5102                 p = a + b - c
5103
5104                 pa = abs(p - a)
5105                 pb = abs(p - b)
5106                 pc = abs(p - c)
5107
5108                 if pa <= pb and pa <= pc:
5109                     color = (color + a) & 0xff
5110                 elif pb <= pc:
5111                     color = (color + b) & 0xff
5112                 else:
5113                     color = (color + c) & 0xff
5114
5115             current_row.append(color)
5116
5117     return width, height, pixels
5118
5119
5120 def write_xattr(path, key, value):
5121     # Windows: Write xattrs to NTFS Alternate Data Streams:
5122     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5123     if compat_os_name == 'nt':
5124         assert ':' not in key
5125         assert os.path.exists(path)
5126
5127         try:
5128             with open(f'{path}:{key}', 'wb') as f:
5129                 f.write(value)
5130         except OSError as e:
5131             raise XAttrMetadataError(e.errno, e.strerror)
5132         return
5133
5134     # UNIX Method 1. Use xattrs/pyxattrs modules
5135
5136     setxattr = None
5137     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
5138         # Unicode arguments are not supported in pyxattr until version 0.5.0
5139         # See https://github.com/ytdl-org/youtube-dl/issues/5498
5140         if version_tuple(xattr.__version__) >= (0, 5, 0):
5141             setxattr = xattr.set
5142     elif xattr:
5143         setxattr = xattr.setxattr
5144
5145     if setxattr:
5146         try:
5147             setxattr(path, key, value)
5148         except OSError as e:
5149             raise XAttrMetadataError(e.errno, e.strerror)
5150         return
5151
5152     # UNIX Method 2. Use setfattr/xattr executables
5153     exe = ('setfattr' if check_executable('setfattr', ['--version'])
5154            else 'xattr' if check_executable('xattr', ['-h']) else None)
5155     if not exe:
5156         raise XAttrUnavailableError(
5157             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
5158             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
5159
5160     value = value.decode()
5161     try:
5162         _, stderr, returncode = Popen.run(
5163             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
5164             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5165     except OSError as e:
5166         raise XAttrMetadataError(e.errno, e.strerror)
5167     if returncode:
5168         raise XAttrMetadataError(returncode, stderr)
5169
5170
5171 def random_birthday(year_field, month_field, day_field):
5172     start_date = datetime.date(1950, 1, 1)
5173     end_date = datetime.date(1995, 12, 31)
5174     offset = random.randint(0, (end_date - start_date).days)
5175     random_date = start_date + datetime.timedelta(offset)
5176     return {
5177         year_field: str(random_date.year),
5178         month_field: str(random_date.month),
5179         day_field: str(random_date.day),
5180     }
5181
5182
5183 # Templates for internet shortcut files, which are plain text files.
5184 DOT_URL_LINK_TEMPLATE = '''\
5185 [InternetShortcut]
5186 URL=%(url)s
5187 '''
5188
5189 DOT_WEBLOC_LINK_TEMPLATE = '''\
5190 <?xml version="1.0" encoding="UTF-8"?>
5191 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5192 <plist version="1.0">
5193 <dict>
5194 \t<key>URL</key>
5195 \t<string>%(url)s</string>
5196 </dict>
5197 </plist>
5198 '''
5199
5200 DOT_DESKTOP_LINK_TEMPLATE = '''\
5201 [Desktop Entry]
5202 Encoding=UTF-8
5203 Name=%(filename)s
5204 Type=Link
5205 URL=%(url)s
5206 Icon=text-html
5207 '''
5208
5209 LINK_TEMPLATES = {
5210     'url': DOT_URL_LINK_TEMPLATE,
5211     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5212     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5213 }
5214
5215
5216 def iri_to_uri(iri):
5217     """
5218     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5219
5220     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5221     """
5222
5223     iri_parts = urllib.parse.urlparse(iri)
5224
5225     if '[' in iri_parts.netloc:
5226         raise ValueError('IPv6 URIs are not, yet, supported.')
5227         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5228
5229     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5230
5231     net_location = ''
5232     if iri_parts.username:
5233         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5234         if iri_parts.password is not None:
5235             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5236         net_location += '@'
5237
5238     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5239     # The 'idna' encoding produces ASCII text.
5240     if iri_parts.port is not None and iri_parts.port != 80:
5241         net_location += ':' + str(iri_parts.port)
5242
5243     return urllib.parse.urlunparse(
5244         (iri_parts.scheme,
5245             net_location,
5246
5247             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5248
5249             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5250             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5251
5252             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5253             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5254
5255             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5256
5257     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5258
5259
5260 def to_high_limit_path(path):
5261     if sys.platform in ['win32', 'cygwin']:
5262         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5263         return '\\\\?\\' + os.path.abspath(path)
5264
5265     return path
5266
5267
5268 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
5269     val = traverse_obj(obj, *variadic(field))
5270     if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
5271         return default
5272     return template % func(val)
5273
5274
5275 def clean_podcast_url(url):
5276     return re.sub(r'''(?x)
5277         (?:
5278             (?:
5279                 chtbl\.com/track|
5280                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5281                 play\.podtrac\.com
5282             )/[^/]+|
5283             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5284             flex\.acast\.com|
5285             pd(?:
5286                 cn\.co| # https://podcorn.com/analytics-prefix/
5287                 st\.fm # https://podsights.com/docs/
5288             )/e
5289         )/''', '', url)
5290
5291
5292 _HEX_TABLE = '0123456789abcdef'
5293
5294
5295 def random_uuidv4():
5296     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5297
5298
5299 def make_dir(path, to_screen=None):
5300     try:
5301         dn = os.path.dirname(path)
5302         if dn and not os.path.exists(dn):
5303             os.makedirs(dn)
5304         return True
5305     except OSError as err:
5306         if callable(to_screen) is not None:
5307             to_screen('unable to create directory ' + error_to_compat_str(err))
5308         return False
5309
5310
5311 def get_executable_path():
5312     from .update import _get_variant_and_executable_path
5313
5314     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5315
5316
5317 def load_plugins(name, suffix, namespace):
5318     classes = {}
5319     with contextlib.suppress(FileNotFoundError):
5320         plugins_spec = importlib.util.spec_from_file_location(
5321             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5322         plugins = importlib.util.module_from_spec(plugins_spec)
5323         sys.modules[plugins_spec.name] = plugins
5324         plugins_spec.loader.exec_module(plugins)
5325         for name in dir(plugins):
5326             if name in namespace:
5327                 continue
5328             if not name.endswith(suffix):
5329                 continue
5330             klass = getattr(plugins, name)
5331             classes[name] = namespace[name] = klass
5332     return classes
5333
5334
5335 def traverse_obj(
5336         obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
5337         casesense=True, is_user_input=False, traverse_string=False):
5338     """
5339     Safely traverse nested `dict`s and `Sequence`s
5340
5341     >>> obj = [{}, {"key": "value"}]
5342     >>> traverse_obj(obj, (1, "key"))
5343     "value"
5344
5345     Each of the provided `paths` is tested and the first producing a valid result will be returned.
5346     The next path will also be tested if the path branched but no results could be found.
5347     Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
5348     A value of None is treated as the absence of a value.
5349
5350     The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
5351
5352     The keys in the path can be one of:
5353         - `None`:           Return the current object.
5354         - `str`/`int`:      Return `obj[key]`. For `re.Match, return `obj.group(key)`.
5355         - `slice`:          Branch out and return all values in `obj[key]`.
5356         - `Ellipsis`:       Branch out and return a list of all values.
5357         - `tuple`/`list`:   Branch out and return a list of all matching values.
5358                             Read as: `[traverse_obj(obj, branch) for branch in branches]`.
5359         - `function`:       Branch out and return values filtered by the function.
5360                             Read as: `[value for key, value in obj if function(key, value)]`.
5361                             For `Sequence`s, `key` is the index of the value.
5362         - `dict`            Transform the current object and return a matching dict.
5363                             Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
5364
5365         `tuple`, `list`, and `dict` all support nested paths and branches.
5366
5367     @params paths           Paths which to traverse by.
5368     @param default          Value to return if the paths do not match.
5369     @param expected_type    If a `type`, only accept final values of this type.
5370                             If any other callable, try to call the function on each result.
5371     @param get_all          If `False`, return the first matching result, otherwise all matching ones.
5372     @param casesense        If `False`, consider string dictionary keys as case insensitive.
5373
5374     The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
5375
5376     @param is_user_input    Whether the keys are generated from user input.
5377                             If `True` strings get converted to `int`/`slice` if needed.
5378     @param traverse_string  Whether to traverse into objects as strings.
5379                             If `True`, any non-compatible object will first be
5380                             converted into a string and then traversed into.
5381
5382
5383     @returns                The result of the object traversal.
5384                             If successful, `get_all=True`, and the path branches at least once,
5385                             then a list of results is returned instead.
5386                             A list is always returned if the last path branches and no `default` is given.
5387     """
5388     is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
5389     casefold = lambda k: k.casefold() if isinstance(k, str) else k
5390
5391     if isinstance(expected_type, type):
5392         type_test = lambda val: val if isinstance(val, expected_type) else None
5393     else:
5394         type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
5395
5396     def apply_key(key, obj):
5397         if obj is None:
5398             return
5399
5400         elif key is None:
5401             yield obj
5402
5403         elif isinstance(key, (list, tuple)):
5404             for branch in key:
5405                 _, result = apply_path(obj, branch)
5406                 yield from result
5407
5408         elif key is ...:
5409             if isinstance(obj, collections.abc.Mapping):
5410                 yield from obj.values()
5411             elif is_sequence(obj):
5412                 yield from obj
5413             elif isinstance(obj, re.Match):
5414                 yield from obj.groups()
5415             elif traverse_string:
5416                 yield from str(obj)
5417
5418         elif callable(key):
5419             if is_sequence(obj):
5420                 iter_obj = enumerate(obj)
5421             elif isinstance(obj, collections.abc.Mapping):
5422                 iter_obj = obj.items()
5423             elif isinstance(obj, re.Match):
5424                 iter_obj = enumerate((obj.group(), *obj.groups()))
5425             elif traverse_string:
5426                 iter_obj = enumerate(str(obj))
5427             else:
5428                 return
5429             yield from (v for k, v in iter_obj if try_call(key, args=(k, v)))
5430
5431         elif isinstance(key, dict):
5432             iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
5433             yield {k: v if v is not None else default for k, v in iter_obj
5434                    if v is not None or default is not NO_DEFAULT}
5435
5436         elif isinstance(obj, collections.abc.Mapping):
5437             yield (obj.get(key) if casesense or (key in obj)
5438                    else next((v for k, v in obj.items() if casefold(k) == key), None))
5439
5440         elif isinstance(obj, re.Match):
5441             if isinstance(key, int) or casesense:
5442                 with contextlib.suppress(IndexError):
5443                     yield obj.group(key)
5444                     return
5445
5446             if not isinstance(key, str):
5447                 return
5448
5449             yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
5450
5451         else:
5452             if is_user_input:
5453                 key = (int_or_none(key) if ':' not in key
5454                        else slice(*map(int_or_none, key.split(':'))))
5455
5456             if not isinstance(key, (int, slice)):
5457                 return
5458
5459             if not is_sequence(obj):
5460                 if not traverse_string:
5461                     return
5462                 obj = str(obj)
5463
5464             with contextlib.suppress(IndexError):
5465                 yield obj[key]
5466
5467     def apply_path(start_obj, path):
5468         objs = (start_obj,)
5469         has_branched = False
5470
5471         for key in variadic(path):
5472             if is_user_input and key == ':':
5473                 key = ...
5474
5475             if not casesense and isinstance(key, str):
5476                 key = key.casefold()
5477
5478             if key is ... or isinstance(key, (list, tuple)) or callable(key):
5479                 has_branched = True
5480
5481             key_func = functools.partial(apply_key, key)
5482             objs = itertools.chain.from_iterable(map(key_func, objs))
5483
5484         return has_branched, objs
5485
5486     def _traverse_obj(obj, path, use_list=True):
5487         has_branched, results = apply_path(obj, path)
5488         results = LazyList(x for x in map(type_test, results) if x is not None)
5489
5490         if get_all and has_branched:
5491             return results.exhaust() if results or use_list else None
5492
5493         return results[0] if results else None
5494
5495     for index, path in enumerate(paths, 1):
5496         use_list = default is NO_DEFAULT and index == len(paths)
5497         result = _traverse_obj(obj, path, use_list)
5498         if result is not None:
5499             return result
5500
5501     return None if default is NO_DEFAULT else default
5502
5503
5504 def traverse_dict(dictn, keys, casesense=True):
5505     deprecation_warning(f'"{__name__}.traverse_dict" is deprecated and may be removed '
5506                         f'in a future version. Use "{__name__}.traverse_obj" instead')
5507     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5508
5509
5510 def get_first(obj, keys, **kwargs):
5511     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5512
5513
5514 def time_seconds(**kwargs):
5515     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5516     return t.timestamp()
5517
5518
5519 # create a JSON Web Signature (jws) with HS256 algorithm
5520 # the resulting format is in JWS Compact Serialization
5521 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5522 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5523 def jwt_encode_hs256(payload_data, key, headers={}):
5524     header_data = {
5525         'alg': 'HS256',
5526         'typ': 'JWT',
5527     }
5528     if headers:
5529         header_data.update(headers)
5530     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5531     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5532     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5533     signature_b64 = base64.b64encode(h.digest())
5534     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5535     return token
5536
5537
5538 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5539 def jwt_decode_hs256(jwt):
5540     header_b64, payload_b64, signature_b64 = jwt.split('.')
5541     # add trailing ='s that may have been stripped, superfluous ='s are ignored
5542     payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
5543     return payload_data
5544
5545
5546 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5547
5548
5549 @functools.cache
5550 def supports_terminal_sequences(stream):
5551     if compat_os_name == 'nt':
5552         if not WINDOWS_VT_MODE:
5553             return False
5554     elif not os.getenv('TERM'):
5555         return False
5556     try:
5557         return stream.isatty()
5558     except BaseException:
5559         return False
5560
5561
5562 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5563     if get_windows_version() < (10, 0, 10586):
5564         return
5565     global WINDOWS_VT_MODE
5566     try:
5567         Popen.run('', shell=True)
5568     except Exception:
5569         return
5570
5571     WINDOWS_VT_MODE = True
5572     supports_terminal_sequences.cache_clear()
5573
5574
5575 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5576
5577
5578 def remove_terminal_sequences(string):
5579     return _terminal_sequences_re.sub('', string)
5580
5581
5582 def number_of_digits(number):
5583     return len('%d' % number)
5584
5585
5586 def join_nonempty(*values, delim='-', from_dict=None):
5587     if from_dict is not None:
5588         values = (traverse_obj(from_dict, variadic(v)) for v in values)
5589     return delim.join(map(str, filter(None, values)))
5590
5591
5592 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5593     """
5594     Find the largest format dimensions in terms of video width and, for each thumbnail:
5595     * Modify the URL: Match the width with the provided regex and replace with the former width
5596     * Update dimensions
5597
5598     This function is useful with video services that scale the provided thumbnails on demand
5599     """
5600     _keys = ('width', 'height')
5601     max_dimensions = max(
5602         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5603         default=(0, 0))
5604     if not max_dimensions[0]:
5605         return thumbnails
5606     return [
5607         merge_dicts(
5608             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5609             dict(zip(_keys, max_dimensions)), thumbnail)
5610         for thumbnail in thumbnails
5611     ]
5612
5613
5614 def parse_http_range(range):
5615     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5616     if not range:
5617         return None, None, None
5618     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5619     if not crg:
5620         return None, None, None
5621     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5622
5623
5624 def read_stdin(what):
5625     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5626     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5627     return sys.stdin
5628
5629
5630 def determine_file_encoding(data):
5631     """
5632     Detect the text encoding used
5633     @returns (encoding, bytes to skip)
5634     """
5635
5636     # BOM marks are given priority over declarations
5637     for bom, enc in BOMS:
5638         if data.startswith(bom):
5639             return enc, len(bom)
5640
5641     # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
5642     # We ignore the endianness to get a good enough match
5643     data = data.replace(b'\0', b'')
5644     mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
5645     return mobj.group(1).decode() if mobj else None, 0
5646
5647
5648 class Config:
5649     own_args = None
5650     parsed_args = None
5651     filename = None
5652     __initialized = False
5653
5654     def __init__(self, parser, label=None):
5655         self.parser, self.label = parser, label
5656         self._loaded_paths, self.configs = set(), []
5657
5658     def init(self, args=None, filename=None):
5659         assert not self.__initialized
5660         self.own_args, self.filename = args, filename
5661         return self.load_configs()
5662
5663     def load_configs(self):
5664         directory = ''
5665         if self.filename:
5666             location = os.path.realpath(self.filename)
5667             directory = os.path.dirname(location)
5668             if location in self._loaded_paths:
5669                 return False
5670             self._loaded_paths.add(location)
5671
5672         self.__initialized = True
5673         opts, _ = self.parser.parse_known_args(self.own_args)
5674         self.parsed_args = self.own_args
5675         for location in opts.config_locations or []:
5676             if location == '-':
5677                 if location in self._loaded_paths:
5678                     continue
5679                 self._loaded_paths.add(location)
5680                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5681                 continue
5682             location = os.path.join(directory, expand_path(location))
5683             if os.path.isdir(location):
5684                 location = os.path.join(location, 'yt-dlp.conf')
5685             if not os.path.exists(location):
5686                 self.parser.error(f'config location {location} does not exist')
5687             self.append_config(self.read_file(location), location)
5688         return True
5689
5690     def __str__(self):
5691         label = join_nonempty(
5692             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5693             delim=' ')
5694         return join_nonempty(
5695             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5696             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5697             delim='\n')
5698
5699     @staticmethod
5700     def read_file(filename, default=[]):
5701         try:
5702             optionf = open(filename, 'rb')
5703         except OSError:
5704             return default  # silently skip if file is not present
5705         try:
5706             enc, skip = determine_file_encoding(optionf.read(512))
5707             optionf.seek(skip, io.SEEK_SET)
5708         except OSError:
5709             enc = None  # silently skip read errors
5710         try:
5711             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5712             contents = optionf.read().decode(enc or preferredencoding())
5713             res = shlex.split(contents, comments=True)
5714         except Exception as err:
5715             raise ValueError(f'Unable to parse "{filename}": {err}')
5716         finally:
5717             optionf.close()
5718         return res
5719
5720     @staticmethod
5721     def hide_login_info(opts):
5722         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5723         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5724
5725         def _scrub_eq(o):
5726             m = eqre.match(o)
5727             if m:
5728                 return m.group('key') + '=PRIVATE'
5729             else:
5730                 return o
5731
5732         opts = list(map(_scrub_eq, opts))
5733         for idx, opt in enumerate(opts):
5734             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5735                 opts[idx + 1] = 'PRIVATE'
5736         return opts
5737
5738     def append_config(self, *args, label=None):
5739         config = type(self)(self.parser, label)
5740         config._loaded_paths = self._loaded_paths
5741         if config.init(*args):
5742             self.configs.append(config)
5743
5744     @property
5745     def all_args(self):
5746         for config in reversed(self.configs):
5747             yield from config.all_args
5748         yield from self.parsed_args or []
5749
5750     def parse_known_args(self, **kwargs):
5751         return self.parser.parse_known_args(self.all_args, **kwargs)
5752
5753     def parse_args(self):
5754         return self.parser.parse_args(self.all_args)
5755
5756
5757 class WebSocketsWrapper:
5758     """Wraps websockets module to use in non-async scopes"""
5759     pool = None
5760
5761     def __init__(self, url, headers=None, connect=True):
5762         self.loop = asyncio.new_event_loop()
5763         # XXX: "loop" is deprecated
5764         self.conn = websockets.connect(
5765             url, extra_headers=headers, ping_interval=None,
5766             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5767         if connect:
5768             self.__enter__()
5769         atexit.register(self.__exit__, None, None, None)
5770
5771     def __enter__(self):
5772         if not self.pool:
5773             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5774         return self
5775
5776     def send(self, *args):
5777         self.run_with_loop(self.pool.send(*args), self.loop)
5778
5779     def recv(self, *args):
5780         return self.run_with_loop(self.pool.recv(*args), self.loop)
5781
5782     def __exit__(self, type, value, traceback):
5783         try:
5784             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5785         finally:
5786             self.loop.close()
5787             self._cancel_all_tasks(self.loop)
5788
5789     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5790     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5791     @staticmethod
5792     def run_with_loop(main, loop):
5793         if not asyncio.iscoroutine(main):
5794             raise ValueError(f'a coroutine was expected, got {main!r}')
5795
5796         try:
5797             return loop.run_until_complete(main)
5798         finally:
5799             loop.run_until_complete(loop.shutdown_asyncgens())
5800             if hasattr(loop, 'shutdown_default_executor'):
5801                 loop.run_until_complete(loop.shutdown_default_executor())
5802
5803     @staticmethod
5804     def _cancel_all_tasks(loop):
5805         to_cancel = asyncio.all_tasks(loop)
5806
5807         if not to_cancel:
5808             return
5809
5810         for task in to_cancel:
5811             task.cancel()
5812
5813         # XXX: "loop" is removed in python 3.10+
5814         loop.run_until_complete(
5815             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5816
5817         for task in to_cancel:
5818             if task.cancelled():
5819                 continue
5820             if task.exception() is not None:
5821                 loop.call_exception_handler({
5822                     'message': 'unhandled exception during asyncio.run() shutdown',
5823                     'exception': task.exception(),
5824                     'task': task,
5825                 })
5826
5827
5828 def merge_headers(*dicts):
5829     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5830     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5831
5832
5833 def cached_method(f):
5834     """Cache a method"""
5835     signature = inspect.signature(f)
5836
5837     @functools.wraps(f)
5838     def wrapper(self, *args, **kwargs):
5839         bound_args = signature.bind(self, *args, **kwargs)
5840         bound_args.apply_defaults()
5841         key = tuple(bound_args.arguments.values())[1:]
5842
5843         cache = vars(self).setdefault('__cached_method__cache', {}).setdefault(f.__name__, {})
5844         if key not in cache:
5845             cache[key] = f(self, *args, **kwargs)
5846         return cache[key]
5847     return wrapper
5848
5849
5850 class classproperty:
5851     """property access for class methods"""
5852
5853     def __init__(self, func):
5854         functools.update_wrapper(self, func)
5855         self.func = func
5856
5857     def __get__(self, _, cls):
5858         return self.func(cls)
5859
5860
5861 class Namespace(types.SimpleNamespace):
5862     """Immutable namespace"""
5863
5864     def __iter__(self):
5865         return iter(self.__dict__.values())
5866
5867     @property
5868     def items_(self):
5869         return self.__dict__.items()
5870
5871
5872 MEDIA_EXTENSIONS = Namespace(
5873     common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
5874     video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
5875     common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5876     audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'),
5877     thumbnails=('jpg', 'png', 'webp'),
5878     storyboards=('mhtml', ),
5879     subtitles=('srt', 'vtt', 'ass', 'lrc'),
5880     manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5881 )
5882 MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
5883 MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
5884
5885 KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
5886
5887
5888 class RetryManager:
5889     """Usage:
5890         for retry in RetryManager(...):
5891             try:
5892                 ...
5893             except SomeException as err:
5894                 retry.error = err
5895                 continue
5896     """
5897     attempt, _error = 0, None
5898
5899     def __init__(self, _retries, _error_callback, **kwargs):
5900         self.retries = _retries or 0
5901         self.error_callback = functools.partial(_error_callback, **kwargs)
5902
5903     def _should_retry(self):
5904         return self._error is not NO_DEFAULT and self.attempt <= self.retries
5905
5906     @property
5907     def error(self):
5908         if self._error is NO_DEFAULT:
5909             return None
5910         return self._error
5911
5912     @error.setter
5913     def error(self, value):
5914         self._error = value
5915
5916     def __iter__(self):
5917         while self._should_retry():
5918             self.error = NO_DEFAULT
5919             self.attempt += 1
5920             yield self
5921             if self.error:
5922                 self.error_callback(self.error, self.attempt, self.retries)
5923
5924     @staticmethod
5925     def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
5926         """Utility function for reporting retries"""
5927         if count > retries:
5928             if error:
5929                 return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
5930             raise e
5931
5932         if not count:
5933             return warn(e)
5934         elif isinstance(e, ExtractorError):
5935             e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
5936         warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5937
5938         delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
5939         if delay:
5940             info(f'Sleeping {delay:.2f} seconds ...')
5941             time.sleep(delay)
5942
5943
5944 def make_archive_id(ie, video_id):
5945     ie_key = ie if isinstance(ie, str) else ie.ie_key()
5946     return f'{ie_key.lower()} {video_id}'
5947
5948
5949 def truncate_string(s, left, right=0):
5950     assert left > 3 and right >= 0
5951     if s is None or len(s) <= left + right:
5952         return s
5953     return f'{s[:left-3]}...{s[-right:]}'
5954
5955
5956 def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
5957     assert 'all' in alias_dict, '"all" alias is required'
5958     requested = list(start or [])
5959     for val in options:
5960         discard = val.startswith('-')
5961         if discard:
5962             val = val[1:]
5963
5964         if val in alias_dict:
5965             val = alias_dict[val] if not discard else [
5966                 i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
5967             # NB: Do not allow regex in aliases for performance
5968             requested = orderedSet_from_options(val, alias_dict, start=requested)
5969             continue
5970
5971         current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
5972                    else [val] if val in alias_dict['all'] else None)
5973         if current is None:
5974             raise ValueError(val)
5975
5976         if discard:
5977             for item in current:
5978                 while item in requested:
5979                     requested.remove(item)
5980         else:
5981             requested.extend(current)
5982
5983     return orderedSet(requested)
5984
5985
5986 # Deprecated
5987 has_certifi = bool(certifi)
5988 has_websockets = bool(websockets)