yt_dlp/utils.py

   1 import asyncio
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import collections.abc
   9 import contextlib
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import html.entities
  18 import html.parser
  19 import http.client
  20 import http.cookiejar
  21 import importlib.util
  22 import inspect
  23 import io
  24 import itertools
  25 import json
  26 import locale
  27 import math
  28 import mimetypes
  29 import operator
  30 import os
  31 import platform
  32 import random
  33 import re
  34 import shlex
  35 import socket
  36 import ssl
  37 import struct
  38 import subprocess
  39 import sys
  40 import tempfile
  41 import time
  42 import traceback
  43 import types
  44 import unicodedata
  45 import urllib.error
  46 import urllib.parse
  47 import urllib.request
  48 import xml.etree.ElementTree
  49 import zlib
  50
  51 from .compat import functools  # isort: split
  52 from .compat import (
  53     compat_etree_fromstring,
  54     compat_expanduser,
  55     compat_HTMLParseError,
  56     compat_os_name,
  57     compat_shlex_quote,
  58 )
  59 from .dependencies import brotli, certifi, websockets, xattr
  60 from .socks import ProxyType, sockssocket
  61
  62
  63 def register_socks_protocols():
  64     # "Register" SOCKS protocols
  65     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  66     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  67     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  68         if scheme not in urllib.parse.uses_netloc:
  69             urllib.parse.uses_netloc.append(scheme)
  70
  71
  72 # This is not clearly defined otherwise
  73 compiled_regex_type = type(re.compile(''))
  74
  75
  76 def random_user_agent():
  77     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  78     _CHROME_VERSIONS = (
  79         '90.0.4430.212',
  80         '90.0.4430.24',
  81         '90.0.4430.70',
  82         '90.0.4430.72',
  83         '90.0.4430.85',
  84         '90.0.4430.93',
  85         '91.0.4472.101',
  86         '91.0.4472.106',
  87         '91.0.4472.114',
  88         '91.0.4472.124',
  89         '91.0.4472.164',
  90         '91.0.4472.19',
  91         '91.0.4472.77',
  92         '92.0.4515.107',
  93         '92.0.4515.115',
  94         '92.0.4515.131',
  95         '92.0.4515.159',
  96         '92.0.4515.43',
  97         '93.0.4556.0',
  98         '93.0.4577.15',
  99         '93.0.4577.63',
 100         '93.0.4577.82',
 101         '94.0.4606.41',
 102         '94.0.4606.54',
 103         '94.0.4606.61',
 104         '94.0.4606.71',
 105         '94.0.4606.81',
 106         '94.0.4606.85',
 107         '95.0.4638.17',
 108         '95.0.4638.50',
 109         '95.0.4638.54',
 110         '95.0.4638.69',
 111         '95.0.4638.74',
 112         '96.0.4664.18',
 113         '96.0.4664.45',
 114         '96.0.4664.55',
 115         '96.0.4664.93',
 116         '97.0.4692.20',
 117     )
 118     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 119
 120
 121 SUPPORTED_ENCODINGS = [
 122     'gzip', 'deflate'
 123 ]
 124 if brotli:
 125     SUPPORTED_ENCODINGS.append('br')
 126
 127 std_headers = {
 128     'User-Agent': random_user_agent(),
 129     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 130     'Accept-Language': 'en-us,en;q=0.5',
 131     'Sec-Fetch-Mode': 'navigate',
 132 }
 133
 134
 135 USER_AGENTS = {
 136     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 137 }
 138
 139
 140 NO_DEFAULT = object()
 141 IDENTITY = lambda x: x
 142
 143 ENGLISH_MONTH_NAMES = [
 144     'January', 'February', 'March', 'April', 'May', 'June',
 145     'July', 'August', 'September', 'October', 'November', 'December']
 146
 147 MONTH_NAMES = {
 148     'en': ENGLISH_MONTH_NAMES,
 149     'fr': [
 150         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 151         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 152     # these follow the genitive grammatical case (dopełniacz)
 153     # some websites might be using nominative, which will require another month list
 154     # https://en.wikibooks.org/wiki/Polish/Noun_cases
 155     'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
 156            'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
 157 }
 158
 159 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
 160 TIMEZONE_NAMES = {
 161     'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
 162     'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
 163     'EST': -5, 'EDT': -4,  # Eastern
 164     'CST': -6, 'CDT': -5,  # Central
 165     'MST': -7, 'MDT': -6,  # Mountain
 166     'PST': -8, 'PDT': -7   # Pacific
 167 }
 168
 169 # needed for sanitizing filenames in restricted mode
 170 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 171                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 172                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 173
 174 DATE_FORMATS = (
 175     '%d %B %Y',
 176     '%d %b %Y',
 177     '%B %d %Y',
 178     '%B %dst %Y',
 179     '%B %dnd %Y',
 180     '%B %drd %Y',
 181     '%B %dth %Y',
 182     '%b %d %Y',
 183     '%b %dst %Y',
 184     '%b %dnd %Y',
 185     '%b %drd %Y',
 186     '%b %dth %Y',
 187     '%b %dst %Y %I:%M',
 188     '%b %dnd %Y %I:%M',
 189     '%b %drd %Y %I:%M',
 190     '%b %dth %Y %I:%M',
 191     '%Y %m %d',
 192     '%Y-%m-%d',
 193     '%Y.%m.%d.',
 194     '%Y/%m/%d',
 195     '%Y/%m/%d %H:%M',
 196     '%Y/%m/%d %H:%M:%S',
 197     '%Y%m%d%H%M',
 198     '%Y%m%d%H%M%S',
 199     '%Y%m%d',
 200     '%Y-%m-%d %H:%M',
 201     '%Y-%m-%d %H:%M:%S',
 202     '%Y-%m-%d %H:%M:%S.%f',
 203     '%Y-%m-%d %H:%M:%S:%f',
 204     '%d.%m.%Y %H:%M',
 205     '%d.%m.%Y %H.%M',
 206     '%Y-%m-%dT%H:%M:%SZ',
 207     '%Y-%m-%dT%H:%M:%S.%fZ',
 208     '%Y-%m-%dT%H:%M:%S.%f0Z',
 209     '%Y-%m-%dT%H:%M:%S',
 210     '%Y-%m-%dT%H:%M:%S.%f',
 211     '%Y-%m-%dT%H:%M',
 212     '%b %d %Y at %H:%M',
 213     '%b %d %Y at %H:%M:%S',
 214     '%B %d %Y at %H:%M',
 215     '%B %d %Y at %H:%M:%S',
 216     '%H:%M %d-%b-%Y',
 217 )
 218
 219 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 220 DATE_FORMATS_DAY_FIRST.extend([
 221     '%d-%m-%Y',
 222     '%d.%m.%Y',
 223     '%d.%m.%y',
 224     '%d/%m/%Y',
 225     '%d/%m/%y',
 226     '%d/%m/%Y %H:%M:%S',
 227     '%d-%m-%Y %H:%M',
 228 ])
 229
 230 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 231 DATE_FORMATS_MONTH_FIRST.extend([
 232     '%m-%d-%Y',
 233     '%m.%d.%Y',
 234     '%m/%d/%Y',
 235     '%m/%d/%y',
 236     '%m/%d/%Y %H:%M:%S',
 237 ])
 238
 239 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 240 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
 241
 242 NUMBER_RE = r'\d+(?:\.\d+)?'
 243
 244
 245 @functools.cache
 246 def preferredencoding():
 247     """Get preferred encoding.
 248
 249     Returns the best encoding scheme for the system, based on
 250     locale.getpreferredencoding() and some further tweaks.
 251     """
 252     try:
 253         pref = locale.getpreferredencoding()
 254         'TEST'.encode(pref)
 255     except Exception:
 256         pref = 'UTF-8'
 257
 258     return pref
 259
 260
 261 def write_json_file(obj, fn):
 262     """ Encode obj as JSON and write it to fn, atomically if possible """
 263
 264     tf = tempfile.NamedTemporaryFile(
 265         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 266         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 267
 268     try:
 269         with tf:
 270             json.dump(obj, tf, ensure_ascii=False)
 271         if sys.platform == 'win32':
 272             # Need to remove existing file on Windows, else os.rename raises
 273             # WindowsError or FileExistsError.
 274             with contextlib.suppress(OSError):
 275                 os.unlink(fn)
 276         with contextlib.suppress(OSError):
 277             mask = os.umask(0)
 278             os.umask(mask)
 279             os.chmod(tf.name, 0o666 & ~mask)
 280         os.rename(tf.name, fn)
 281     except Exception:
 282         with contextlib.suppress(OSError):
 283             os.remove(tf.name)
 284         raise
 285
 286
 287 def find_xpath_attr(node, xpath, key, val=None):
 288     """ Find the xpath xpath[@key=val] """
 289     assert re.match(r'^[a-zA-Z_-]+$', key)
 290     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 291     return node.find(expr)
 292
 293 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 294 # the namespace parameter
 295
 296
 297 def xpath_with_ns(path, ns_map):
 298     components = [c.split(':') for c in path.split('/')]
 299     replaced = []
 300     for c in components:
 301         if len(c) == 1:
 302             replaced.append(c[0])
 303         else:
 304             ns, tag = c
 305             replaced.append('{%s}%s' % (ns_map[ns], tag))
 306     return '/'.join(replaced)
 307
 308
 309 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 310     def _find_xpath(xpath):
 311         return node.find(xpath)
 312
 313     if isinstance(xpath, str):
 314         n = _find_xpath(xpath)
 315     else:
 316         for xp in xpath:
 317             n = _find_xpath(xp)
 318             if n is not None:
 319                 break
 320
 321     if n is None:
 322         if default is not NO_DEFAULT:
 323             return default
 324         elif fatal:
 325             name = xpath if name is None else name
 326             raise ExtractorError('Could not find XML element %s' % name)
 327         else:
 328             return None
 329     return n
 330
 331
 332 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 333     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 334     if n is None or n == default:
 335         return n
 336     if n.text is None:
 337         if default is not NO_DEFAULT:
 338             return default
 339         elif fatal:
 340             name = xpath if name is None else name
 341             raise ExtractorError('Could not find XML element\'s text %s' % name)
 342         else:
 343             return None
 344     return n.text
 345
 346
 347 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 348     n = find_xpath_attr(node, xpath, key)
 349     if n is None:
 350         if default is not NO_DEFAULT:
 351             return default
 352         elif fatal:
 353             name = f'{xpath}[@{key}]' if name is None else name
 354             raise ExtractorError('Could not find XML attribute %s' % name)
 355         else:
 356             return None
 357     return n.attrib[key]
 358
 359
 360 def get_element_by_id(id, html, **kwargs):
 361     """Return the content of the tag with the specified ID in the passed HTML document"""
 362     return get_element_by_attribute('id', id, html, **kwargs)
 363
 364
 365 def get_element_html_by_id(id, html, **kwargs):
 366     """Return the html of the tag with the specified ID in the passed HTML document"""
 367     return get_element_html_by_attribute('id', id, html, **kwargs)
 368
 369
 370 def get_element_by_class(class_name, html):
 371     """Return the content of the first tag with the specified class in the passed HTML document"""
 372     retval = get_elements_by_class(class_name, html)
 373     return retval[0] if retval else None
 374
 375
 376 def get_element_html_by_class(class_name, html):
 377     """Return the html of the first tag with the specified class in the passed HTML document"""
 378     retval = get_elements_html_by_class(class_name, html)
 379     return retval[0] if retval else None
 380
 381
 382 def get_element_by_attribute(attribute, value, html, **kwargs):
 383     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 384     return retval[0] if retval else None
 385
 386
 387 def get_element_html_by_attribute(attribute, value, html, **kargs):
 388     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 389     return retval[0] if retval else None
 390
 391
 392 def get_elements_by_class(class_name, html, **kargs):
 393     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 394     return get_elements_by_attribute(
 395         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 396         html, escape_value=False)
 397
 398
 399 def get_elements_html_by_class(class_name, html):
 400     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 401     return get_elements_html_by_attribute(
 402         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 403         html, escape_value=False)
 404
 405
 406 def get_elements_by_attribute(*args, **kwargs):
 407     """Return the content of the tag with the specified attribute in the passed HTML document"""
 408     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 409
 410
 411 def get_elements_html_by_attribute(*args, **kwargs):
 412     """Return the html of the tag with the specified attribute in the passed HTML document"""
 413     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 414
 415
 416 def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
 417     """
 418     Return the text (content) and the html (whole) of the tag with the specified
 419     attribute in the passed HTML document
 420     """
 421     if not value:
 422         return
 423
 424     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 425
 426     value = re.escape(value) if escape_value else value
 427
 428     partial_element_re = rf'''(?x)
 429         <(?P<tag>{tag})
 430          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 431          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 432         '''
 433
 434     for m in re.finditer(partial_element_re, html):
 435         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 436
 437         yield (
 438             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 439             whole
 440         )
 441
 442
 443 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
 444     """
 445     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 446     closing tag for the first opening tag it has encountered, and can be used
 447     as a context manager
 448     """
 449
 450     class HTMLBreakOnClosingTagException(Exception):
 451         pass
 452
 453     def __init__(self):
 454         self.tagstack = collections.deque()
 455         html.parser.HTMLParser.__init__(self)
 456
 457     def __enter__(self):
 458         return self
 459
 460     def __exit__(self, *_):
 461         self.close()
 462
 463     def close(self):
 464         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 465         # so data remains buffered; we no longer have any interest in it, thus
 466         # override this method to discard it
 467         pass
 468
 469     def handle_starttag(self, tag, _):
 470         self.tagstack.append(tag)
 471
 472     def handle_endtag(self, tag):
 473         if not self.tagstack:
 474             raise compat_HTMLParseError('no tags in the stack')
 475         while self.tagstack:
 476             inner_tag = self.tagstack.pop()
 477             if inner_tag == tag:
 478                 break
 479         else:
 480             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 481         if not self.tagstack:
 482             raise self.HTMLBreakOnClosingTagException()
 483
 484
 485 # XXX: This should be far less strict
 486 def get_element_text_and_html_by_tag(tag, html):
 487     """
 488     For the first element with the specified tag in the passed HTML document
 489     return its' content (text) and the whole element (html)
 490     """
 491     def find_or_raise(haystack, needle, exc):
 492         try:
 493             return haystack.index(needle)
 494         except ValueError:
 495             raise exc
 496     closing_tag = f'</{tag}>'
 497     whole_start = find_or_raise(
 498         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 499     content_start = find_or_raise(
 500         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 501     content_start += whole_start + 1
 502     with HTMLBreakOnClosingTagParser() as parser:
 503         parser.feed(html[whole_start:content_start])
 504         if not parser.tagstack or parser.tagstack[0] != tag:
 505             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 506         offset = content_start
 507         while offset < len(html):
 508             next_closing_tag_start = find_or_raise(
 509                 html[offset:], closing_tag,
 510                 compat_HTMLParseError(f'closing {tag} tag not found'))
 511             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 512             try:
 513                 parser.feed(html[offset:offset + next_closing_tag_end])
 514                 offset += next_closing_tag_end
 515             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 516                 return html[content_start:offset + next_closing_tag_start], \
 517                     html[whole_start:offset + next_closing_tag_end]
 518         raise compat_HTMLParseError('unexpected end of html')
 519
 520
 521 class HTMLAttributeParser(html.parser.HTMLParser):
 522     """Trivial HTML parser to gather the attributes for a single element"""
 523
 524     def __init__(self):
 525         self.attrs = {}
 526         html.parser.HTMLParser.__init__(self)
 527
 528     def handle_starttag(self, tag, attrs):
 529         self.attrs = dict(attrs)
 530         raise compat_HTMLParseError('done')
 531
 532
 533 class HTMLListAttrsParser(html.parser.HTMLParser):
 534     """HTML parser to gather the attributes for the elements of a list"""
 535
 536     def __init__(self):
 537         html.parser.HTMLParser.__init__(self)
 538         self.items = []
 539         self._level = 0
 540
 541     def handle_starttag(self, tag, attrs):
 542         if tag == 'li' and self._level == 0:
 543             self.items.append(dict(attrs))
 544         self._level += 1
 545
 546     def handle_endtag(self, tag):
 547         self._level -= 1
 548
 549
 550 def extract_attributes(html_element):
 551     """Given a string for an HTML element such as
 552     <el
 553          a="foo" B="bar" c="&98;az" d=boz
 554          empty= noval entity="&amp;"
 555          sq='"' dq="'"
 556     >
 557     Decode and return a dictionary of attributes.
 558     {
 559         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 560         'empty': '', 'noval': None, 'entity': '&',
 561         'sq': '"', 'dq': '\''
 562     }.
 563     """
 564     parser = HTMLAttributeParser()
 565     with contextlib.suppress(compat_HTMLParseError):
 566         parser.feed(html_element)
 567         parser.close()
 568     return parser.attrs
 569
 570
 571 def parse_list(webpage):
 572     """Given a string for an series of HTML <li> elements,
 573     return a dictionary of their attributes"""
 574     parser = HTMLListAttrsParser()
 575     parser.feed(webpage)
 576     parser.close()
 577     return parser.items
 578
 579
 580 def clean_html(html):
 581     """Clean an HTML snippet into a readable string"""
 582
 583     if html is None:  # Convenience for sanitizing descriptions etc.
 584         return html
 585
 586     html = re.sub(r'\s+', ' ', html)
 587     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 588     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 589     # Strip html tags
 590     html = re.sub('<.*?>', '', html)
 591     # Replace html entities
 592     html = unescapeHTML(html)
 593     return html.strip()
 594
 595
 596 class LenientJSONDecoder(json.JSONDecoder):
 597     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 598         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 599         super().__init__(*args, **kwargs)
 600
 601     def decode(self, s):
 602         if self.transform_source:
 603             s = self.transform_source(s)
 604         try:
 605             if self.ignore_extra:
 606                 return self.raw_decode(s.lstrip())[0]
 607             return super().decode(s)
 608         except json.JSONDecodeError as e:
 609             if e.pos is not None:
 610                 raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
 611             raise
 612
 613
 614 def sanitize_open(filename, open_mode):
 615     """Try to open the given filename, and slightly tweak it if this fails.
 616
 617     Attempts to open the given filename. If this fails, it tries to change
 618     the filename slightly, step by step, until it's either able to open it
 619     or it fails and raises a final exception, like the standard open()
 620     function.
 621
 622     It returns the tuple (stream, definitive_file_name).
 623     """
 624     if filename == '-':
 625         if sys.platform == 'win32':
 626             import msvcrt
 627
 628             # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
 629             with contextlib.suppress(io.UnsupportedOperation):
 630                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 631         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 632
 633     for attempt in range(2):
 634         try:
 635             try:
 636                 if sys.platform == 'win32':
 637                     # FIXME: An exclusive lock also locks the file from being read.
 638                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 639                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 640                     raise LockingUnsupportedError()
 641                 stream = locked_file(filename, open_mode, block=False).__enter__()
 642             except OSError:
 643                 stream = open(filename, open_mode)
 644             return stream, filename
 645         except OSError as err:
 646             if attempt or err.errno in (errno.EACCES,):
 647                 raise
 648             old_filename, filename = filename, sanitize_path(filename)
 649             if old_filename == filename:
 650                 raise
 651
 652
 653 def timeconvert(timestr):
 654     """Convert RFC 2822 defined time string into system timestamp"""
 655     timestamp = None
 656     timetuple = email.utils.parsedate_tz(timestr)
 657     if timetuple is not None:
 658         timestamp = email.utils.mktime_tz(timetuple)
 659     return timestamp
 660
 661
 662 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 663     """Sanitizes a string so it could be used as part of a filename.
 664     @param restricted   Use a stricter subset of allowed characters
 665     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 666                         If unset, yt-dlp's new sanitization rules are in effect
 667     """
 668     if s == '':
 669         return ''
 670
 671     def replace_insane(char):
 672         if restricted and char in ACCENT_CHARS:
 673             return ACCENT_CHARS[char]
 674         elif not restricted and char == '\n':
 675             return '\0 '
 676         elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
 677             # Replace with their full-width unicode counterparts
 678             return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
 679         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 680             return ''
 681         elif char == '"':
 682             return '' if restricted else '\''
 683         elif char == ':':
 684             return '\0_\0-' if restricted else '\0 \0-'
 685         elif char in '\\/|*<>':
 686             return '\0_'
 687         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 688             return '\0_'
 689         return char
 690
 691     # Replace look-alike Unicode glyphs
 692     if restricted and (is_id is NO_DEFAULT or not is_id):
 693         s = unicodedata.normalize('NFKC', s)
 694     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 695     result = ''.join(map(replace_insane, s))
 696     if is_id is NO_DEFAULT:
 697         result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
 698         STRIP_RE = r'(?:\0.|[ _-])*'
 699         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 700     result = result.replace('\0', '') or '_'
 701
 702     if not is_id:
 703         while '__' in result:
 704             result = result.replace('__', '_')
 705         result = result.strip('_')
 706         # Common case of "Foreign band name - English song title"
 707         if restricted and result.startswith('-_'):
 708             result = result[2:]
 709         if result.startswith('-'):
 710             result = '_' + result[len('-'):]
 711         result = result.lstrip('.')
 712         if not result:
 713             result = '_'
 714     return result
 715
 716
 717 def sanitize_path(s, force=False):
 718     """Sanitizes and normalizes path on Windows"""
 719     if sys.platform == 'win32':
 720         force = False
 721         drive_or_unc, _ = os.path.splitdrive(s)
 722     elif force:
 723         drive_or_unc = ''
 724     else:
 725         return s
 726
 727     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 728     if drive_or_unc:
 729         norm_path.pop(0)
 730     sanitized_path = [
 731         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 732         for path_part in norm_path]
 733     if drive_or_unc:
 734         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 735     elif force and s and s[0] == os.path.sep:
 736         sanitized_path.insert(0, os.path.sep)
 737     return os.path.join(*sanitized_path)
 738
 739
 740 def sanitize_url(url, *, scheme='http'):
 741     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 742     # the number of unwanted failures due to missing protocol
 743     if url is None:
 744         return
 745     elif url.startswith('//'):
 746         return f'{scheme}:{url}'
 747     # Fix some common typos seen so far
 748     COMMON_TYPOS = (
 749         # https://github.com/ytdl-org/youtube-dl/issues/15649
 750         (r'^httpss://', r'https://'),
 751         # https://bx1.be/lives/direct-tv/
 752         (r'^rmtp([es]?)://', r'rtmp\1://'),
 753     )
 754     for mistake, fixup in COMMON_TYPOS:
 755         if re.match(mistake, url):
 756             return re.sub(mistake, fixup, url)
 757     return url
 758
 759
 760 def extract_basic_auth(url):
 761     parts = urllib.parse.urlsplit(url)
 762     if parts.username is None:
 763         return url, None
 764     url = urllib.parse.urlunsplit(parts._replace(netloc=(
 765         parts.hostname if parts.port is None
 766         else '%s:%d' % (parts.hostname, parts.port))))
 767     auth_payload = base64.b64encode(
 768         ('%s:%s' % (parts.username, parts.password or '')).encode())
 769     return url, f'Basic {auth_payload.decode()}'
 770
 771
 772 def sanitized_Request(url, *args, **kwargs):
 773     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 774     if auth_header is not None:
 775         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 776         headers['Authorization'] = auth_header
 777     return urllib.request.Request(url, *args, **kwargs)
 778
 779
 780 def expand_path(s):
 781     """Expand shell variables and ~"""
 782     return os.path.expandvars(compat_expanduser(s))
 783
 784
 785 def orderedSet(iterable, *, lazy=False):
 786     """Remove all duplicates from the input iterable"""
 787     def _iter():
 788         seen = []  # Do not use set since the items can be unhashable
 789         for x in iterable:
 790             if x not in seen:
 791                 seen.append(x)
 792                 yield x
 793
 794     return _iter() if lazy else list(_iter())
 795
 796
 797 def _htmlentity_transform(entity_with_semicolon):
 798     """Transforms an HTML entity to a character."""
 799     entity = entity_with_semicolon[:-1]
 800
 801     # Known non-numeric HTML entity
 802     if entity in html.entities.name2codepoint:
 803         return chr(html.entities.name2codepoint[entity])
 804
 805     # TODO: HTML5 allows entities without a semicolon.
 806     # E.g. '&Eacuteric' should be decoded as 'Éric'.
 807     if entity_with_semicolon in html.entities.html5:
 808         return html.entities.html5[entity_with_semicolon]
 809
 810     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 811     if mobj is not None:
 812         numstr = mobj.group(1)
 813         if numstr.startswith('x'):
 814             base = 16
 815             numstr = '0%s' % numstr
 816         else:
 817             base = 10
 818         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 819         with contextlib.suppress(ValueError):
 820             return chr(int(numstr, base))
 821
 822     # Unknown entity in name, return its literal representation
 823     return '&%s;' % entity
 824
 825
 826 def unescapeHTML(s):
 827     if s is None:
 828         return None
 829     assert isinstance(s, str)
 830
 831     return re.sub(
 832         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 833
 834
 835 def escapeHTML(text):
 836     return (
 837         text
 838         .replace('&', '&amp;')
 839         .replace('<', '&lt;')
 840         .replace('>', '&gt;')
 841         .replace('"', '&quot;')
 842         .replace("'", '&#39;')
 843     )
 844
 845
 846 def process_communicate_or_kill(p, *args, **kwargs):
 847     deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
 848                         f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
 849     return Popen.communicate_or_kill(p, *args, **kwargs)
 850
 851
 852 class Popen(subprocess.Popen):
 853     if sys.platform == 'win32':
 854         _startupinfo = subprocess.STARTUPINFO()
 855         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 856     else:
 857         _startupinfo = None
 858
 859     @staticmethod
 860     def _fix_pyinstaller_ld_path(env):
 861         """Restore LD_LIBRARY_PATH when using PyInstaller
 862             Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
 863                  https://github.com/yt-dlp/yt-dlp/issues/4573
 864         """
 865         if not hasattr(sys, '_MEIPASS'):
 866             return
 867
 868         def _fix(key):
 869             orig = env.get(f'{key}_ORIG')
 870             if orig is None:
 871                 env.pop(key, None)
 872             else:
 873                 env[key] = orig
 874
 875         _fix('LD_LIBRARY_PATH')  # Linux
 876         _fix('DYLD_LIBRARY_PATH')  # macOS
 877
 878     def __init__(self, *args, env=None, text=False, **kwargs):
 879         if env is None:
 880             env = os.environ.copy()
 881         self._fix_pyinstaller_ld_path(env)
 882
 883         if text is True:
 884             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 885             kwargs.setdefault('encoding', 'utf-8')
 886             kwargs.setdefault('errors', 'replace')
 887         super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo)
 888
 889     def communicate_or_kill(self, *args, **kwargs):
 890         try:
 891             return self.communicate(*args, **kwargs)
 892         except BaseException:  # Including KeyboardInterrupt
 893             self.kill(timeout=None)
 894             raise
 895
 896     def kill(self, *, timeout=0):
 897         super().kill()
 898         if timeout != 0:
 899             self.wait(timeout=timeout)
 900
 901     @classmethod
 902     def run(cls, *args, timeout=None, **kwargs):
 903         with cls(*args, **kwargs) as proc:
 904             default = '' if proc.text_mode else b''
 905             stdout, stderr = proc.communicate_or_kill(timeout=timeout)
 906             return stdout or default, stderr or default, proc.returncode
 907
 908
 909 def get_subprocess_encoding():
 910     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 911         # For subprocess calls, encode with locale encoding
 912         # Refer to http://stackoverflow.com/a/9951851/35070
 913         encoding = preferredencoding()
 914     else:
 915         encoding = sys.getfilesystemencoding()
 916     if encoding is None:
 917         encoding = 'utf-8'
 918     return encoding
 919
 920
 921 def encodeFilename(s, for_subprocess=False):
 922     assert isinstance(s, str)
 923     return s
 924
 925
 926 def decodeFilename(b, for_subprocess=False):
 927     return b
 928
 929
 930 def encodeArgument(s):
 931     # Legacy code that uses byte strings
 932     # Uncomment the following line after fixing all post processors
 933     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
 934     return s if isinstance(s, str) else s.decode('ascii')
 935
 936
 937 def decodeArgument(b):
 938     return b
 939
 940
 941 def decodeOption(optval):
 942     if optval is None:
 943         return optval
 944     if isinstance(optval, bytes):
 945         optval = optval.decode(preferredencoding())
 946
 947     assert isinstance(optval, str)
 948     return optval
 949
 950
 951 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 952
 953
 954 def timetuple_from_msec(msec):
 955     secs, msec = divmod(msec, 1000)
 956     mins, secs = divmod(secs, 60)
 957     hrs, mins = divmod(mins, 60)
 958     return _timetuple(hrs, mins, secs, msec)
 959
 960
 961 def formatSeconds(secs, delim=':', msec=False):
 962     time = timetuple_from_msec(secs * 1000)
 963     if time.hours:
 964         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 965     elif time.minutes:
 966         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 967     else:
 968         ret = '%d' % time.seconds
 969     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 970
 971
 972 def _ssl_load_windows_store_certs(ssl_context, storename):
 973     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 974     try:
 975         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 976                  if encoding == 'x509_asn' and (
 977                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 978     except PermissionError:
 979         return
 980     for cert in certs:
 981         with contextlib.suppress(ssl.SSLError):
 982             ssl_context.load_verify_locations(cadata=cert)
 983
 984
 985 def make_HTTPS_handler(params, **kwargs):
 986     opts_check_certificate = not params.get('nocheckcertificate')
 987     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 988     context.check_hostname = opts_check_certificate
 989     if params.get('legacyserverconnect'):
 990         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 991         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 992         context.set_ciphers('DEFAULT')
 993     elif (
 994         sys.version_info < (3, 10)
 995         and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1)
 996         and not ssl.OPENSSL_VERSION.startswith('LibreSSL')
 997     ):
 998         # Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1].
 999         # This is to ensure consistent behavior across Python versions, and help avoid fingerprinting
1000         # in some situations [2][3].
1001         # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely
1002         # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe.
1003         # LibreSSL is excluded until further investigation due to cipher support issues [5][6].
1004         # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536
1005         # 2. https://github.com/yt-dlp/yt-dlp/issues/4627
1006         # 3. https://github.com/yt-dlp/yt-dlp/pull/5294
1007         # 4. https://peps.python.org/pep-0644/
1008         # 5. https://peps.python.org/pep-0644/#libressl-support
1009         # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368
1010         context.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM')
1011         context.minimum_version = ssl.TLSVersion.TLSv1_2
1012
1013     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
1014     if opts_check_certificate:
1015         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
1016             context.load_verify_locations(cafile=certifi.where())
1017         else:
1018             try:
1019                 context.load_default_certs()
1020                 # Work around the issue in load_default_certs when there are bad certificates. See:
1021                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
1022                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
1023             except ssl.SSLError:
1024                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
1025                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
1026                     for storename in ('CA', 'ROOT'):
1027                         _ssl_load_windows_store_certs(context, storename)
1028                 context.set_default_verify_paths()
1029
1030     client_certfile = params.get('client_certificate')
1031     if client_certfile:
1032         try:
1033             context.load_cert_chain(
1034                 client_certfile, keyfile=params.get('client_certificate_key'),
1035                 password=params.get('client_certificate_password'))
1036         except ssl.SSLError:
1037             raise YoutubeDLError('Unable to load client certificate')
1038
1039     # Some servers may reject requests if ALPN extension is not sent. See:
1040     # https://github.com/python/cpython/issues/85140
1041     # https://github.com/yt-dlp/yt-dlp/issues/3878
1042     with contextlib.suppress(NotImplementedError):
1043         context.set_alpn_protocols(['http/1.1'])
1044
1045     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
1046
1047
1048 def bug_reports_message(before=';'):
1049     from .update import REPOSITORY
1050
1051     msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
1052            'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')
1053
1054     before = before.rstrip()
1055     if not before or before.endswith(('.', '!', '?')):
1056         msg = msg[0].title() + msg[1:]
1057
1058     return (before + ' ' if before else '') + msg
1059
1060
1061 class YoutubeDLError(Exception):
1062     """Base exception for YoutubeDL errors."""
1063     msg = None
1064
1065     def __init__(self, msg=None):
1066         if msg is not None:
1067             self.msg = msg
1068         elif self.msg is None:
1069             self.msg = type(self).__name__
1070         super().__init__(self.msg)
1071
1072
1073 network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
1074 if hasattr(ssl, 'CertificateError'):
1075     network_exceptions.append(ssl.CertificateError)
1076 network_exceptions = tuple(network_exceptions)
1077
1078
1079 class ExtractorError(YoutubeDLError):
1080     """Error during info extraction."""
1081
1082     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1083         """ tb, if given, is the original traceback (so that it can be printed out).
1084         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1085         """
1086         if sys.exc_info()[0] in network_exceptions:
1087             expected = True
1088
1089         self.orig_msg = str(msg)
1090         self.traceback = tb
1091         self.expected = expected
1092         self.cause = cause
1093         self.video_id = video_id
1094         self.ie = ie
1095         self.exc_info = sys.exc_info()  # preserve original exception
1096         if isinstance(self.exc_info[1], ExtractorError):
1097             self.exc_info = self.exc_info[1].exc_info
1098
1099         super().__init__(''.join((
1100             format_field(ie, None, '[%s] '),
1101             format_field(video_id, None, '%s: '),
1102             msg,
1103             format_field(cause, None, ' (caused by %r)'),
1104             '' if expected else bug_reports_message())))
1105
1106     def format_traceback(self):
1107         return join_nonempty(
1108             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1109             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1110             delim='\n') or None
1111
1112
1113 class UnsupportedError(ExtractorError):
1114     def __init__(self, url):
1115         super().__init__(
1116             'Unsupported URL: %s' % url, expected=True)
1117         self.url = url
1118
1119
1120 class RegexNotFoundError(ExtractorError):
1121     """Error when a regex didn't match"""
1122     pass
1123
1124
1125 class GeoRestrictedError(ExtractorError):
1126     """Geographic restriction Error exception.
1127
1128     This exception may be thrown when a video is not available from your
1129     geographic location due to geographic restrictions imposed by a website.
1130     """
1131
1132     def __init__(self, msg, countries=None, **kwargs):
1133         kwargs['expected'] = True
1134         super().__init__(msg, **kwargs)
1135         self.countries = countries
1136
1137
1138 class UserNotLive(ExtractorError):
1139     """Error when a channel/user is not live"""
1140
1141     def __init__(self, msg=None, **kwargs):
1142         kwargs['expected'] = True
1143         super().__init__(msg or 'The channel is not currently live', **kwargs)
1144
1145
1146 class DownloadError(YoutubeDLError):
1147     """Download Error exception.
1148
1149     This exception may be thrown by FileDownloader objects if they are not
1150     configured to continue on errors. They will contain the appropriate
1151     error message.
1152     """
1153
1154     def __init__(self, msg, exc_info=None):
1155         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1156         super().__init__(msg)
1157         self.exc_info = exc_info
1158
1159
1160 class EntryNotInPlaylist(YoutubeDLError):
1161     """Entry not in playlist exception.
1162
1163     This exception will be thrown by YoutubeDL when a requested entry
1164     is not found in the playlist info_dict
1165     """
1166     msg = 'Entry not found in info'
1167
1168
1169 class SameFileError(YoutubeDLError):
1170     """Same File exception.
1171
1172     This exception will be thrown by FileDownloader objects if they detect
1173     multiple files would have to be downloaded to the same file on disk.
1174     """
1175     msg = 'Fixed output name but more than one file to download'
1176
1177     def __init__(self, filename=None):
1178         if filename is not None:
1179             self.msg += f': {filename}'
1180         super().__init__(self.msg)
1181
1182
1183 class PostProcessingError(YoutubeDLError):
1184     """Post Processing exception.
1185
1186     This exception may be raised by PostProcessor's .run() method to
1187     indicate an error in the postprocessing task.
1188     """
1189
1190
1191 class DownloadCancelled(YoutubeDLError):
1192     """ Exception raised when the download queue should be interrupted """
1193     msg = 'The download was cancelled'
1194
1195
1196 class ExistingVideoReached(DownloadCancelled):
1197     """ --break-on-existing triggered """
1198     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1199
1200
1201 class RejectedVideoReached(DownloadCancelled):
1202     """ --break-on-reject triggered """
1203     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1204
1205
1206 class MaxDownloadsReached(DownloadCancelled):
1207     """ --max-downloads limit has been reached. """
1208     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1209
1210
1211 class ReExtractInfo(YoutubeDLError):
1212     """ Video info needs to be re-extracted. """
1213
1214     def __init__(self, msg, expected=False):
1215         super().__init__(msg)
1216         self.expected = expected
1217
1218
1219 class ThrottledDownload(ReExtractInfo):
1220     """ Download speed below --throttled-rate. """
1221     msg = 'The download speed is below throttle limit'
1222
1223     def __init__(self):
1224         super().__init__(self.msg, expected=False)
1225
1226
1227 class UnavailableVideoError(YoutubeDLError):
1228     """Unavailable Format exception.
1229
1230     This exception will be thrown when a video is requested
1231     in a format that is not available for that video.
1232     """
1233     msg = 'Unable to download video'
1234
1235     def __init__(self, err=None):
1236         if err is not None:
1237             self.msg += f': {err}'
1238         super().__init__(self.msg)
1239
1240
1241 class ContentTooShortError(YoutubeDLError):
1242     """Content Too Short exception.
1243
1244     This exception may be raised by FileDownloader objects when a file they
1245     download is too small for what the server announced first, indicating
1246     the connection was probably interrupted.
1247     """
1248
1249     def __init__(self, downloaded, expected):
1250         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1251         # Both in bytes
1252         self.downloaded = downloaded
1253         self.expected = expected
1254
1255
1256 class XAttrMetadataError(YoutubeDLError):
1257     def __init__(self, code=None, msg='Unknown error'):
1258         super().__init__(msg)
1259         self.code = code
1260         self.msg = msg
1261
1262         # Parsing code and msg
1263         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1264                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1265             self.reason = 'NO_SPACE'
1266         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1267             self.reason = 'VALUE_TOO_LONG'
1268         else:
1269             self.reason = 'NOT_SUPPORTED'
1270
1271
1272 class XAttrUnavailableError(YoutubeDLError):
1273     pass
1274
1275
1276 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1277     hc = http_class(*args, **kwargs)
1278     source_address = ydl_handler._params.get('source_address')
1279
1280     if source_address is not None:
1281         # This is to workaround _create_connection() from socket where it will try all
1282         # address data from getaddrinfo() including IPv6. This filters the result from
1283         # getaddrinfo() based on the source_address value.
1284         # This is based on the cpython socket.create_connection() function.
1285         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1286         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1287             host, port = address
1288             err = None
1289             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1290             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1291             ip_addrs = [addr for addr in addrs if addr[0] == af]
1292             if addrs and not ip_addrs:
1293                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1294                 raise OSError(
1295                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1296                     % (ip_version, source_address[0]))
1297             for res in ip_addrs:
1298                 af, socktype, proto, canonname, sa = res
1299                 sock = None
1300                 try:
1301                     sock = socket.socket(af, socktype, proto)
1302                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1303                         sock.settimeout(timeout)
1304                     sock.bind(source_address)
1305                     sock.connect(sa)
1306                     err = None  # Explicitly break reference cycle
1307                     return sock
1308                 except OSError as _:
1309                     err = _
1310                     if sock is not None:
1311                         sock.close()
1312             if err is not None:
1313                 raise err
1314             else:
1315                 raise OSError('getaddrinfo returns an empty list')
1316         if hasattr(hc, '_create_connection'):
1317             hc._create_connection = _create_connection
1318         hc.source_address = (source_address, 0)
1319
1320     return hc
1321
1322
1323 def handle_youtubedl_headers(headers):
1324     filtered_headers = headers
1325
1326     if 'Youtubedl-no-compression' in filtered_headers:
1327         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1328         del filtered_headers['Youtubedl-no-compression']
1329
1330     return filtered_headers
1331
1332
1333 class YoutubeDLHandler(urllib.request.HTTPHandler):
1334     """Handler for HTTP requests and responses.
1335
1336     This class, when installed with an OpenerDirector, automatically adds
1337     the standard headers to every HTTP request and handles gzipped and
1338     deflated responses from web servers. If compression is to be avoided in
1339     a particular request, the original request in the program code only has
1340     to include the HTTP header "Youtubedl-no-compression", which will be
1341     removed before making the real request.
1342
1343     Part of this code was copied from:
1344
1345     http://techknack.net/python-urllib2-handlers/
1346
1347     Andrew Rowls, the author of that code, agreed to release it to the
1348     public domain.
1349     """
1350
1351     def __init__(self, params, *args, **kwargs):
1352         urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
1353         self._params = params
1354
1355     def http_open(self, req):
1356         conn_class = http.client.HTTPConnection
1357
1358         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1359         if socks_proxy:
1360             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1361             del req.headers['Ytdl-socks-proxy']
1362
1363         return self.do_open(functools.partial(
1364             _create_http_connection, self, conn_class, False),
1365             req)
1366
1367     @staticmethod
1368     def deflate(data):
1369         if not data:
1370             return data
1371         try:
1372             return zlib.decompress(data, -zlib.MAX_WBITS)
1373         except zlib.error:
1374             return zlib.decompress(data)
1375
1376     @staticmethod
1377     def brotli(data):
1378         if not data:
1379             return data
1380         return brotli.decompress(data)
1381
1382     def http_request(self, req):
1383         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1384         # always respected by websites, some tend to give out URLs with non percent-encoded
1385         # non-ASCII characters (see telemb.py, ard.py [#3412])
1386         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1387         # To work around aforementioned issue we will replace request's original URL with
1388         # percent-encoded one
1389         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1390         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1391         url = req.get_full_url()
1392         url_escaped = escape_url(url)
1393
1394         # Substitute URL if any change after escaping
1395         if url != url_escaped:
1396             req = update_Request(req, url=url_escaped)
1397
1398         for h, v in self._params.get('http_headers', std_headers).items():
1399             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1400             # The dict keys are capitalized because of this bug by urllib
1401             if h.capitalize() not in req.headers:
1402                 req.add_header(h, v)
1403
1404         if 'Accept-encoding' not in req.headers:
1405             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1406
1407         req.headers = handle_youtubedl_headers(req.headers)
1408
1409         return super().do_request_(req)
1410
1411     def http_response(self, req, resp):
1412         old_resp = resp
1413         # gzip
1414         if resp.headers.get('Content-encoding', '') == 'gzip':
1415             content = resp.read()
1416             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1417             try:
1418                 uncompressed = io.BytesIO(gz.read())
1419             except OSError as original_ioerror:
1420                 # There may be junk add the end of the file
1421                 # See http://stackoverflow.com/q/4928560/35070 for details
1422                 for i in range(1, 1024):
1423                     try:
1424                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1425                         uncompressed = io.BytesIO(gz.read())
1426                     except OSError:
1427                         continue
1428                     break
1429                 else:
1430                     raise original_ioerror
1431             resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1432             resp.msg = old_resp.msg
1433             del resp.headers['Content-encoding']
1434         # deflate
1435         if resp.headers.get('Content-encoding', '') == 'deflate':
1436             gz = io.BytesIO(self.deflate(resp.read()))
1437             resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1438             resp.msg = old_resp.msg
1439             del resp.headers['Content-encoding']
1440         # brotli
1441         if resp.headers.get('Content-encoding', '') == 'br':
1442             resp = urllib.request.addinfourl(
1443                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1444             resp.msg = old_resp.msg
1445             del resp.headers['Content-encoding']
1446         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1447         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1448         if 300 <= resp.code < 400:
1449             location = resp.headers.get('Location')
1450             if location:
1451                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1452                 location = location.encode('iso-8859-1').decode()
1453                 location_escaped = escape_url(location)
1454                 if location != location_escaped:
1455                     del resp.headers['Location']
1456                     resp.headers['Location'] = location_escaped
1457         return resp
1458
1459     https_request = http_request
1460     https_response = http_response
1461
1462
1463 def make_socks_conn_class(base_class, socks_proxy):
1464     assert issubclass(base_class, (
1465         http.client.HTTPConnection, http.client.HTTPSConnection))
1466
1467     url_components = urllib.parse.urlparse(socks_proxy)
1468     if url_components.scheme.lower() == 'socks5':
1469         socks_type = ProxyType.SOCKS5
1470     elif url_components.scheme.lower() in ('socks', 'socks4'):
1471         socks_type = ProxyType.SOCKS4
1472     elif url_components.scheme.lower() == 'socks4a':
1473         socks_type = ProxyType.SOCKS4A
1474
1475     def unquote_if_non_empty(s):
1476         if not s:
1477             return s
1478         return urllib.parse.unquote_plus(s)
1479
1480     proxy_args = (
1481         socks_type,
1482         url_components.hostname, url_components.port or 1080,
1483         True,  # Remote DNS
1484         unquote_if_non_empty(url_components.username),
1485         unquote_if_non_empty(url_components.password),
1486     )
1487
1488     class SocksConnection(base_class):
1489         def connect(self):
1490             self.sock = sockssocket()
1491             self.sock.setproxy(*proxy_args)
1492             if isinstance(self.timeout, (int, float)):
1493                 self.sock.settimeout(self.timeout)
1494             self.sock.connect((self.host, self.port))
1495
1496             if isinstance(self, http.client.HTTPSConnection):
1497                 if hasattr(self, '_context'):  # Python > 2.6
1498                     self.sock = self._context.wrap_socket(
1499                         self.sock, server_hostname=self.host)
1500                 else:
1501                     self.sock = ssl.wrap_socket(self.sock)
1502
1503     return SocksConnection
1504
1505
1506 class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
1507     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1508         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
1509         self._https_conn_class = https_conn_class or http.client.HTTPSConnection
1510         self._params = params
1511
1512     def https_open(self, req):
1513         kwargs = {}
1514         conn_class = self._https_conn_class
1515
1516         if hasattr(self, '_context'):  # python > 2.6
1517             kwargs['context'] = self._context
1518         if hasattr(self, '_check_hostname'):  # python 3.x
1519             kwargs['check_hostname'] = self._check_hostname
1520
1521         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1522         if socks_proxy:
1523             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1524             del req.headers['Ytdl-socks-proxy']
1525
1526         try:
1527             return self.do_open(
1528                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1529         except urllib.error.URLError as e:
1530             if (isinstance(e.reason, ssl.SSLError)
1531                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1532                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1533             raise
1534
1535
1536 def is_path_like(f):
1537     return isinstance(f, (str, bytes, os.PathLike))
1538
1539
1540 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1541     """
1542     See [1] for cookie file format.
1543
1544     1. https://curl.haxx.se/docs/http-cookies.html
1545     """
1546     _HTTPONLY_PREFIX = '#HttpOnly_'
1547     _ENTRY_LEN = 7
1548     _HEADER = '''# Netscape HTTP Cookie File
1549 # This file is generated by yt-dlp.  Do not edit.
1550
1551 '''
1552     _CookieFileEntry = collections.namedtuple(
1553         'CookieFileEntry',
1554         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1555
1556     def __init__(self, filename=None, *args, **kwargs):
1557         super().__init__(None, *args, **kwargs)
1558         if is_path_like(filename):
1559             filename = os.fspath(filename)
1560         self.filename = filename
1561
1562     @staticmethod
1563     def _true_or_false(cndn):
1564         return 'TRUE' if cndn else 'FALSE'
1565
1566     @contextlib.contextmanager
1567     def open(self, file, *, write=False):
1568         if is_path_like(file):
1569             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1570                 yield f
1571         else:
1572             if write:
1573                 file.truncate(0)
1574             yield file
1575
1576     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1577         now = time.time()
1578         for cookie in self:
1579             if (not ignore_discard and cookie.discard
1580                     or not ignore_expires and cookie.is_expired(now)):
1581                 continue
1582             name, value = cookie.name, cookie.value
1583             if value is None:
1584                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1585                 # with no name, whereas http.cookiejar regards it as a
1586                 # cookie with no value.
1587                 name, value = '', name
1588             f.write('%s\n' % '\t'.join((
1589                 cookie.domain,
1590                 self._true_or_false(cookie.domain.startswith('.')),
1591                 cookie.path,
1592                 self._true_or_false(cookie.secure),
1593                 str_or_none(cookie.expires, default=''),
1594                 name, value
1595             )))
1596
1597     def save(self, filename=None, *args, **kwargs):
1598         """
1599         Save cookies to a file.
1600         Code is taken from CPython 3.6
1601         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1602
1603         if filename is None:
1604             if self.filename is not None:
1605                 filename = self.filename
1606             else:
1607                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1608
1609         # Store session cookies with `expires` set to 0 instead of an empty string
1610         for cookie in self:
1611             if cookie.expires is None:
1612                 cookie.expires = 0
1613
1614         with self.open(filename, write=True) as f:
1615             f.write(self._HEADER)
1616             self._really_save(f, *args, **kwargs)
1617
1618     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1619         """Load cookies from a file."""
1620         if filename is None:
1621             if self.filename is not None:
1622                 filename = self.filename
1623             else:
1624                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1625
1626         def prepare_line(line):
1627             if line.startswith(self._HTTPONLY_PREFIX):
1628                 line = line[len(self._HTTPONLY_PREFIX):]
1629             # comments and empty lines are fine
1630             if line.startswith('#') or not line.strip():
1631                 return line
1632             cookie_list = line.split('\t')
1633             if len(cookie_list) != self._ENTRY_LEN:
1634                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1635             cookie = self._CookieFileEntry(*cookie_list)
1636             if cookie.expires_at and not cookie.expires_at.isdigit():
1637                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1638             return line
1639
1640         cf = io.StringIO()
1641         with self.open(filename) as f:
1642             for line in f:
1643                 try:
1644                     cf.write(prepare_line(line))
1645                 except http.cookiejar.LoadError as e:
1646                     if f'{line.strip()} '[0] in '[{"':
1647                         raise http.cookiejar.LoadError(
1648                             'Cookies file must be Netscape formatted, not JSON. See  '
1649                             'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1650                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1651                     continue
1652         cf.seek(0)
1653         self._really_load(cf, filename, ignore_discard, ignore_expires)
1654         # Session cookies are denoted by either `expires` field set to
1655         # an empty string or 0. MozillaCookieJar only recognizes the former
1656         # (see [1]). So we need force the latter to be recognized as session
1657         # cookies on our own.
1658         # Session cookies may be important for cookies-based authentication,
1659         # e.g. usually, when user does not check 'Remember me' check box while
1660         # logging in on a site, some important cookies are stored as session
1661         # cookies so that not recognizing them will result in failed login.
1662         # 1. https://bugs.python.org/issue17164
1663         for cookie in self:
1664             # Treat `expires=0` cookies as session cookies
1665             if cookie.expires == 0:
1666                 cookie.expires = None
1667                 cookie.discard = True
1668
1669
1670 class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
1671     def __init__(self, cookiejar=None):
1672         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
1673
1674     def http_response(self, request, response):
1675         return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
1676
1677     https_request = urllib.request.HTTPCookieProcessor.http_request
1678     https_response = http_response
1679
1680
1681 class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
1682     """YoutubeDL redirect handler
1683
1684     The code is based on HTTPRedirectHandler implementation from CPython [1].
1685
1686     This redirect handler solves two issues:
1687      - ensures redirect URL is always unicode under python 2
1688      - introduces support for experimental HTTP response status code
1689        308 Permanent Redirect [2] used by some sites [3]
1690
1691     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1692     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1693     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1694     """
1695
1696     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
1697
1698     def redirect_request(self, req, fp, code, msg, headers, newurl):
1699         """Return a Request or None in response to a redirect.
1700
1701         This is called by the http_error_30x methods when a
1702         redirection response is received.  If a redirection should
1703         take place, return a new Request to allow http_error_30x to
1704         perform the redirect.  Otherwise, raise HTTPError if no-one
1705         else should try to handle this url.  Return None if you can't
1706         but another Handler might.
1707         """
1708         m = req.get_method()
1709         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1710                  or code in (301, 302, 303) and m == "POST")):
1711             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
1712         # Strictly (according to RFC 2616), 301 or 302 in response to
1713         # a POST MUST NOT cause a redirection without confirmation
1714         # from the user (of urllib.request, in this case).  In practice,
1715         # essentially all clients do redirect in this case, so we do
1716         # the same.
1717
1718         # Be conciliant with URIs containing a space.  This is mainly
1719         # redundant with the more complete encoding done in http_error_302(),
1720         # but it is kept for compatibility with other callers.
1721         newurl = newurl.replace(' ', '%20')
1722
1723         CONTENT_HEADERS = ("content-length", "content-type")
1724         # NB: don't use dict comprehension for python 2.6 compatibility
1725         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1726
1727         # A 303 must either use GET or HEAD for subsequent request
1728         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1729         if code == 303 and m != 'HEAD':
1730             m = 'GET'
1731         # 301 and 302 redirects are commonly turned into a GET from a POST
1732         # for subsequent requests by browsers, so we'll do the same.
1733         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1734         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1735         if code in (301, 302) and m == 'POST':
1736             m = 'GET'
1737
1738         return urllib.request.Request(
1739             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1740             unverifiable=True, method=m)
1741
1742
1743 def extract_timezone(date_str):
1744     m = re.search(
1745         r'''(?x)
1746             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1747             (?P<tz>Z|                                            # just the UTC Z, or
1748                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1749                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1750                    [ ]?                                          # optional space
1751                 (?P<sign>\+|-)                                   # +/-
1752                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1753             $)
1754         ''', date_str)
1755     if not m:
1756         m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1757         timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
1758         if timezone is not None:
1759             date_str = date_str[:-len(m.group('tz'))]
1760         timezone = datetime.timedelta(hours=timezone or 0)
1761     else:
1762         date_str = date_str[:-len(m.group('tz'))]
1763         if not m.group('sign'):
1764             timezone = datetime.timedelta()
1765         else:
1766             sign = 1 if m.group('sign') == '+' else -1
1767             timezone = datetime.timedelta(
1768                 hours=sign * int(m.group('hours')),
1769                 minutes=sign * int(m.group('minutes')))
1770     return timezone, date_str
1771
1772
1773 def parse_iso8601(date_str, delimiter='T', timezone=None):
1774     """ Return a UNIX timestamp from the given date """
1775
1776     if date_str is None:
1777         return None
1778
1779     date_str = re.sub(r'\.[0-9]+', '', date_str)
1780
1781     if timezone is None:
1782         timezone, date_str = extract_timezone(date_str)
1783
1784     with contextlib.suppress(ValueError):
1785         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1786         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1787         return calendar.timegm(dt.timetuple())
1788
1789
1790 def date_formats(day_first=True):
1791     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1792
1793
1794 def unified_strdate(date_str, day_first=True):
1795     """Return a string with the date in the format YYYYMMDD"""
1796
1797     if date_str is None:
1798         return None
1799     upload_date = None
1800     # Replace commas
1801     date_str = date_str.replace(',', ' ')
1802     # Remove AM/PM + timezone
1803     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1804     _, date_str = extract_timezone(date_str)
1805
1806     for expression in date_formats(day_first):
1807         with contextlib.suppress(ValueError):
1808             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1809     if upload_date is None:
1810         timetuple = email.utils.parsedate_tz(date_str)
1811         if timetuple:
1812             with contextlib.suppress(ValueError):
1813                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1814     if upload_date is not None:
1815         return str(upload_date)
1816
1817
1818 def unified_timestamp(date_str, day_first=True):
1819     if date_str is None:
1820         return None
1821
1822     date_str = re.sub(r'\s+', ' ', re.sub(
1823         r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
1824
1825     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1826     timezone, date_str = extract_timezone(date_str)
1827
1828     # Remove AM/PM + timezone
1829     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1830
1831     # Remove unrecognized timezones from ISO 8601 alike timestamps
1832     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1833     if m:
1834         date_str = date_str[:-len(m.group('tz'))]
1835
1836     # Python only supports microseconds, so remove nanoseconds
1837     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1838     if m:
1839         date_str = m.group(1)
1840
1841     for expression in date_formats(day_first):
1842         with contextlib.suppress(ValueError):
1843             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1844             return calendar.timegm(dt.timetuple())
1845
1846     timetuple = email.utils.parsedate_tz(date_str)
1847     if timetuple:
1848         return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
1849
1850
1851 def determine_ext(url, default_ext='unknown_video'):
1852     if url is None or '.' not in url:
1853         return default_ext
1854     guess = url.partition('?')[0].rpartition('.')[2]
1855     if re.match(r'^[A-Za-z0-9]+$', guess):
1856         return guess
1857     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1858     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1859         return guess.rstrip('/')
1860     else:
1861         return default_ext
1862
1863
1864 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1865     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1866
1867
1868 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1869     R"""
1870     Return a datetime object from a string.
1871     Supported format:
1872         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1873
1874     @param format       strftime format of DATE
1875     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1876                         auto: round to the unit provided in date_str (if applicable).
1877     """
1878     auto_precision = False
1879     if precision == 'auto':
1880         auto_precision = True
1881         precision = 'microsecond'
1882     today = datetime_round(datetime.datetime.utcnow(), precision)
1883     if date_str in ('now', 'today'):
1884         return today
1885     if date_str == 'yesterday':
1886         return today - datetime.timedelta(days=1)
1887     match = re.match(
1888         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1889         date_str)
1890     if match is not None:
1891         start_time = datetime_from_str(match.group('start'), precision, format)
1892         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1893         unit = match.group('unit')
1894         if unit == 'month' or unit == 'year':
1895             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1896             unit = 'day'
1897         else:
1898             if unit == 'week':
1899                 unit = 'day'
1900                 time *= 7
1901             delta = datetime.timedelta(**{unit + 's': time})
1902             new_date = start_time + delta
1903         if auto_precision:
1904             return datetime_round(new_date, unit)
1905         return new_date
1906
1907     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1908
1909
1910 def date_from_str(date_str, format='%Y%m%d', strict=False):
1911     R"""
1912     Return a date object from a string using datetime_from_str
1913
1914     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1915                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1916     """
1917     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1918         raise ValueError(f'Invalid date format "{date_str}"')
1919     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1920
1921
1922 def datetime_add_months(dt, months):
1923     """Increment/Decrement a datetime object by months."""
1924     month = dt.month + months - 1
1925     year = dt.year + month // 12
1926     month = month % 12 + 1
1927     day = min(dt.day, calendar.monthrange(year, month)[1])
1928     return dt.replace(year, month, day)
1929
1930
1931 def datetime_round(dt, precision='day'):
1932     """
1933     Round a datetime object's time to a specific precision
1934     """
1935     if precision == 'microsecond':
1936         return dt
1937
1938     unit_seconds = {
1939         'day': 86400,
1940         'hour': 3600,
1941         'minute': 60,
1942         'second': 1,
1943     }
1944     roundto = lambda x, n: ((x + n / 2) // n) * n
1945     timestamp = calendar.timegm(dt.timetuple())
1946     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1947
1948
1949 def hyphenate_date(date_str):
1950     """
1951     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1952     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1953     if match is not None:
1954         return '-'.join(match.groups())
1955     else:
1956         return date_str
1957
1958
1959 class DateRange:
1960     """Represents a time interval between two dates"""
1961
1962     def __init__(self, start=None, end=None):
1963         """start and end must be strings in the format accepted by date"""
1964         if start is not None:
1965             self.start = date_from_str(start, strict=True)
1966         else:
1967             self.start = datetime.datetime.min.date()
1968         if end is not None:
1969             self.end = date_from_str(end, strict=True)
1970         else:
1971             self.end = datetime.datetime.max.date()
1972         if self.start > self.end:
1973             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1974
1975     @classmethod
1976     def day(cls, day):
1977         """Returns a range that only contains the given day"""
1978         return cls(day, day)
1979
1980     def __contains__(self, date):
1981         """Check if the date is in the range"""
1982         if not isinstance(date, datetime.date):
1983             date = date_from_str(date)
1984         return self.start <= date <= self.end
1985
1986     def __str__(self):
1987         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1988
1989     def __eq__(self, other):
1990         return (isinstance(other, DateRange)
1991                 and self.start == other.start and self.end == other.end)
1992
1993
1994 def platform_name():
1995     """ Returns the platform name as a str """
1996     deprecation_warning(f'"{__name__}.platform_name" is deprecated, use "platform.platform" instead')
1997     return platform.platform()
1998
1999
2000 @functools.cache
2001 def system_identifier():
2002     python_implementation = platform.python_implementation()
2003     if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2004         python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
2005     libc_ver = []
2006     with contextlib.suppress(OSError):  # We may not have access to the executable
2007         libc_ver = platform.libc_ver()
2008
2009     return 'Python %s (%s %s %s) - %s (%s%s)' % (
2010         platform.python_version(),
2011         python_implementation,
2012         platform.machine(),
2013         platform.architecture()[0],
2014         platform.platform(),
2015         ssl.OPENSSL_VERSION,
2016         format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
2017     )
2018
2019
2020 @functools.cache
2021 def get_windows_version():
2022     ''' Get Windows version. returns () if it's not running on Windows '''
2023     if compat_os_name == 'nt':
2024         return version_tuple(platform.win32_ver()[1])
2025     else:
2026         return ()
2027
2028
2029 def write_string(s, out=None, encoding=None):
2030     assert isinstance(s, str)
2031     out = out or sys.stderr
2032
2033     if compat_os_name == 'nt' and supports_terminal_sequences(out):
2034         s = re.sub(r'([\r\n]+)', r' \1', s)
2035
2036     enc, buffer = None, out
2037     if 'b' in getattr(out, 'mode', ''):
2038         enc = encoding or preferredencoding()
2039     elif hasattr(out, 'buffer'):
2040         buffer = out.buffer
2041         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
2042
2043     buffer.write(s.encode(enc, 'ignore') if enc else s)
2044     out.flush()
2045
2046
2047 def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
2048     from . import _IN_CLI
2049     if _IN_CLI:
2050         if msg in deprecation_warning._cache:
2051             return
2052         deprecation_warning._cache.add(msg)
2053         if printer:
2054             return printer(f'{msg}{bug_reports_message()}', **kwargs)
2055         return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
2056     else:
2057         import warnings
2058         warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
2059
2060
2061 deprecation_warning._cache = set()
2062
2063
2064 def bytes_to_intlist(bs):
2065     if not bs:
2066         return []
2067     if isinstance(bs[0], int):  # Python 3
2068         return list(bs)
2069     else:
2070         return [ord(c) for c in bs]
2071
2072
2073 def intlist_to_bytes(xs):
2074     if not xs:
2075         return b''
2076     return struct.pack('%dB' % len(xs), *xs)
2077
2078
2079 class LockingUnsupportedError(OSError):
2080     msg = 'File locking is not supported'
2081
2082     def __init__(self):
2083         super().__init__(self.msg)
2084
2085
2086 # Cross-platform file locking
2087 if sys.platform == 'win32':
2088     import ctypes
2089     import ctypes.wintypes
2090     import msvcrt
2091
2092     class OVERLAPPED(ctypes.Structure):
2093         _fields_ = [
2094             ('Internal', ctypes.wintypes.LPVOID),
2095             ('InternalHigh', ctypes.wintypes.LPVOID),
2096             ('Offset', ctypes.wintypes.DWORD),
2097             ('OffsetHigh', ctypes.wintypes.DWORD),
2098             ('hEvent', ctypes.wintypes.HANDLE),
2099         ]
2100
2101     kernel32 = ctypes.windll.kernel32
2102     LockFileEx = kernel32.LockFileEx
2103     LockFileEx.argtypes = [
2104         ctypes.wintypes.HANDLE,     # hFile
2105         ctypes.wintypes.DWORD,      # dwFlags
2106         ctypes.wintypes.DWORD,      # dwReserved
2107         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2108         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2109         ctypes.POINTER(OVERLAPPED)  # Overlapped
2110     ]
2111     LockFileEx.restype = ctypes.wintypes.BOOL
2112     UnlockFileEx = kernel32.UnlockFileEx
2113     UnlockFileEx.argtypes = [
2114         ctypes.wintypes.HANDLE,     # hFile
2115         ctypes.wintypes.DWORD,      # dwReserved
2116         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2117         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2118         ctypes.POINTER(OVERLAPPED)  # Overlapped
2119     ]
2120     UnlockFileEx.restype = ctypes.wintypes.BOOL
2121     whole_low = 0xffffffff
2122     whole_high = 0x7fffffff
2123
2124     def _lock_file(f, exclusive, block):
2125         overlapped = OVERLAPPED()
2126         overlapped.Offset = 0
2127         overlapped.OffsetHigh = 0
2128         overlapped.hEvent = 0
2129         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2130
2131         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2132                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2133                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2134             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2135             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2136
2137     def _unlock_file(f):
2138         assert f._lock_file_overlapped_p
2139         handle = msvcrt.get_osfhandle(f.fileno())
2140         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2141             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2142
2143 else:
2144     try:
2145         import fcntl
2146
2147         def _lock_file(f, exclusive, block):
2148             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2149             if not block:
2150                 flags |= fcntl.LOCK_NB
2151             try:
2152                 fcntl.flock(f, flags)
2153             except BlockingIOError:
2154                 raise
2155             except OSError:  # AOSP does not have flock()
2156                 fcntl.lockf(f, flags)
2157
2158         def _unlock_file(f):
2159             try:
2160                 fcntl.flock(f, fcntl.LOCK_UN)
2161             except OSError:
2162                 fcntl.lockf(f, fcntl.LOCK_UN)
2163
2164     except ImportError:
2165
2166         def _lock_file(f, exclusive, block):
2167             raise LockingUnsupportedError()
2168
2169         def _unlock_file(f):
2170             raise LockingUnsupportedError()
2171
2172
2173 class locked_file:
2174     locked = False
2175
2176     def __init__(self, filename, mode, block=True, encoding=None):
2177         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2178             raise NotImplementedError(mode)
2179         self.mode, self.block = mode, block
2180
2181         writable = any(f in mode for f in 'wax+')
2182         readable = any(f in mode for f in 'r+')
2183         flags = functools.reduce(operator.ior, (
2184             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2185             getattr(os, 'O_BINARY', 0),  # Windows only
2186             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2187             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2188             os.O_APPEND if 'a' in mode else 0,
2189             os.O_EXCL if 'x' in mode else 0,
2190             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2191         ))
2192
2193         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2194
2195     def __enter__(self):
2196         exclusive = 'r' not in self.mode
2197         try:
2198             _lock_file(self.f, exclusive, self.block)
2199             self.locked = True
2200         except OSError:
2201             self.f.close()
2202             raise
2203         if 'w' in self.mode:
2204             try:
2205                 self.f.truncate()
2206             except OSError as e:
2207                 if e.errno not in (
2208                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2209                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2210                 ):
2211                     raise
2212         return self
2213
2214     def unlock(self):
2215         if not self.locked:
2216             return
2217         try:
2218             _unlock_file(self.f)
2219         finally:
2220             self.locked = False
2221
2222     def __exit__(self, *_):
2223         try:
2224             self.unlock()
2225         finally:
2226             self.f.close()
2227
2228     open = __enter__
2229     close = __exit__
2230
2231     def __getattr__(self, attr):
2232         return getattr(self.f, attr)
2233
2234     def __iter__(self):
2235         return iter(self.f)
2236
2237
2238 @functools.cache
2239 def get_filesystem_encoding():
2240     encoding = sys.getfilesystemencoding()
2241     return encoding if encoding is not None else 'utf-8'
2242
2243
2244 def shell_quote(args):
2245     quoted_args = []
2246     encoding = get_filesystem_encoding()
2247     for a in args:
2248         if isinstance(a, bytes):
2249             # We may get a filename encoded with 'encodeFilename'
2250             a = a.decode(encoding)
2251         quoted_args.append(compat_shlex_quote(a))
2252     return ' '.join(quoted_args)
2253
2254
2255 def smuggle_url(url, data):
2256     """ Pass additional data in a URL for internal use. """
2257
2258     url, idata = unsmuggle_url(url, {})
2259     data.update(idata)
2260     sdata = urllib.parse.urlencode(
2261         {'__youtubedl_smuggle': json.dumps(data)})
2262     return url + '#' + sdata
2263
2264
2265 def unsmuggle_url(smug_url, default=None):
2266     if '#__youtubedl_smuggle' not in smug_url:
2267         return smug_url, default
2268     url, _, sdata = smug_url.rpartition('#')
2269     jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
2270     data = json.loads(jsond)
2271     return url, data
2272
2273
2274 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2275     """ Formats numbers with decimal sufixes like K, M, etc """
2276     num, factor = float_or_none(num), float(factor)
2277     if num is None or num < 0:
2278         return None
2279     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2280     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2281     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2282     if factor == 1024:
2283         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2284     converted = num / (factor ** exponent)
2285     return fmt % (converted, suffix)
2286
2287
2288 def format_bytes(bytes):
2289     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2290
2291
2292 def lookup_unit_table(unit_table, s, strict=False):
2293     num_re = NUMBER_RE if strict else NUMBER_RE.replace(R'\.', '[,.]')
2294     units_re = '|'.join(re.escape(u) for u in unit_table)
2295     m = (re.fullmatch if strict else re.match)(
2296         rf'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s)
2297     if not m:
2298         return None
2299
2300     num = float(m.group('num').replace(',', '.'))
2301     mult = unit_table[m.group('unit')]
2302     return round(num * mult)
2303
2304
2305 def parse_bytes(s):
2306     """Parse a string indicating a byte quantity into an integer"""
2307     return lookup_unit_table(
2308         {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])},
2309         s.upper(), strict=True)
2310
2311
2312 def parse_filesize(s):
2313     if s is None:
2314         return None
2315
2316     # The lower-case forms are of course incorrect and unofficial,
2317     # but we support those too
2318     _UNIT_TABLE = {
2319         'B': 1,
2320         'b': 1,
2321         'bytes': 1,
2322         'KiB': 1024,
2323         'KB': 1000,
2324         'kB': 1024,
2325         'Kb': 1000,
2326         'kb': 1000,
2327         'kilobytes': 1000,
2328         'kibibytes': 1024,
2329         'MiB': 1024 ** 2,
2330         'MB': 1000 ** 2,
2331         'mB': 1024 ** 2,
2332         'Mb': 1000 ** 2,
2333         'mb': 1000 ** 2,
2334         'megabytes': 1000 ** 2,
2335         'mebibytes': 1024 ** 2,
2336         'GiB': 1024 ** 3,
2337         'GB': 1000 ** 3,
2338         'gB': 1024 ** 3,
2339         'Gb': 1000 ** 3,
2340         'gb': 1000 ** 3,
2341         'gigabytes': 1000 ** 3,
2342         'gibibytes': 1024 ** 3,
2343         'TiB': 1024 ** 4,
2344         'TB': 1000 ** 4,
2345         'tB': 1024 ** 4,
2346         'Tb': 1000 ** 4,
2347         'tb': 1000 ** 4,
2348         'terabytes': 1000 ** 4,
2349         'tebibytes': 1024 ** 4,
2350         'PiB': 1024 ** 5,
2351         'PB': 1000 ** 5,
2352         'pB': 1024 ** 5,
2353         'Pb': 1000 ** 5,
2354         'pb': 1000 ** 5,
2355         'petabytes': 1000 ** 5,
2356         'pebibytes': 1024 ** 5,
2357         'EiB': 1024 ** 6,
2358         'EB': 1000 ** 6,
2359         'eB': 1024 ** 6,
2360         'Eb': 1000 ** 6,
2361         'eb': 1000 ** 6,
2362         'exabytes': 1000 ** 6,
2363         'exbibytes': 1024 ** 6,
2364         'ZiB': 1024 ** 7,
2365         'ZB': 1000 ** 7,
2366         'zB': 1024 ** 7,
2367         'Zb': 1000 ** 7,
2368         'zb': 1000 ** 7,
2369         'zettabytes': 1000 ** 7,
2370         'zebibytes': 1024 ** 7,
2371         'YiB': 1024 ** 8,
2372         'YB': 1000 ** 8,
2373         'yB': 1024 ** 8,
2374         'Yb': 1000 ** 8,
2375         'yb': 1000 ** 8,
2376         'yottabytes': 1000 ** 8,
2377         'yobibytes': 1024 ** 8,
2378     }
2379
2380     return lookup_unit_table(_UNIT_TABLE, s)
2381
2382
2383 def parse_count(s):
2384     if s is None:
2385         return None
2386
2387     s = re.sub(r'^[^\d]+\s', '', s).strip()
2388
2389     if re.match(r'^[\d,.]+$', s):
2390         return str_to_int(s)
2391
2392     _UNIT_TABLE = {
2393         'k': 1000,
2394         'K': 1000,
2395         'm': 1000 ** 2,
2396         'M': 1000 ** 2,
2397         'kk': 1000 ** 2,
2398         'KK': 1000 ** 2,
2399         'b': 1000 ** 3,
2400         'B': 1000 ** 3,
2401     }
2402
2403     ret = lookup_unit_table(_UNIT_TABLE, s)
2404     if ret is not None:
2405         return ret
2406
2407     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2408     if mobj:
2409         return str_to_int(mobj.group(1))
2410
2411
2412 def parse_resolution(s, *, lenient=False):
2413     if s is None:
2414         return {}
2415
2416     if lenient:
2417         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2418     else:
2419         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2420     if mobj:
2421         return {
2422             'width': int(mobj.group('w')),
2423             'height': int(mobj.group('h')),
2424         }
2425
2426     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2427     if mobj:
2428         return {'height': int(mobj.group(1))}
2429
2430     mobj = re.search(r'\b([48])[kK]\b', s)
2431     if mobj:
2432         return {'height': int(mobj.group(1)) * 540}
2433
2434     return {}
2435
2436
2437 def parse_bitrate(s):
2438     if not isinstance(s, str):
2439         return
2440     mobj = re.search(r'\b(\d+)\s*kbps', s)
2441     if mobj:
2442         return int(mobj.group(1))
2443
2444
2445 def month_by_name(name, lang='en'):
2446     """ Return the number of a month by (locale-independently) English name """
2447
2448     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2449
2450     try:
2451         return month_names.index(name) + 1
2452     except ValueError:
2453         return None
2454
2455
2456 def month_by_abbreviation(abbrev):
2457     """ Return the number of a month by (locale-independently) English
2458         abbreviations """
2459
2460     try:
2461         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2462     except ValueError:
2463         return None
2464
2465
2466 def fix_xml_ampersands(xml_str):
2467     """Replace all the '&' by '&amp;' in XML"""
2468     return re.sub(
2469         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2470         '&amp;',
2471         xml_str)
2472
2473
2474 def setproctitle(title):
2475     assert isinstance(title, str)
2476
2477     # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
2478     try:
2479         import ctypes
2480     except ImportError:
2481         return
2482
2483     try:
2484         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2485     except OSError:
2486         return
2487     except TypeError:
2488         # LoadLibrary in Windows Python 2.7.13 only expects
2489         # a bytestring, but since unicode_literals turns
2490         # every string into a unicode string, it fails.
2491         return
2492     title_bytes = title.encode()
2493     buf = ctypes.create_string_buffer(len(title_bytes))
2494     buf.value = title_bytes
2495     try:
2496         libc.prctl(15, buf, 0, 0, 0)
2497     except AttributeError:
2498         return  # Strange libc, just skip this
2499
2500
2501 def remove_start(s, start):
2502     return s[len(start):] if s is not None and s.startswith(start) else s
2503
2504
2505 def remove_end(s, end):
2506     return s[:-len(end)] if s is not None and s.endswith(end) else s
2507
2508
2509 def remove_quotes(s):
2510     if s is None or len(s) < 2:
2511         return s
2512     for quote in ('"', "'", ):
2513         if s[0] == quote and s[-1] == quote:
2514             return s[1:-1]
2515     return s
2516
2517
2518 def get_domain(url):
2519     """
2520     This implementation is inconsistent, but is kept for compatibility.
2521     Use this only for "webpage_url_domain"
2522     """
2523     return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
2524
2525
2526 def url_basename(url):
2527     path = urllib.parse.urlparse(url).path
2528     return path.strip('/').split('/')[-1]
2529
2530
2531 def base_url(url):
2532     return re.match(r'https?://[^?#]+/', url).group()
2533
2534
2535 def urljoin(base, path):
2536     if isinstance(path, bytes):
2537         path = path.decode()
2538     if not isinstance(path, str) or not path:
2539         return None
2540     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2541         return path
2542     if isinstance(base, bytes):
2543         base = base.decode()
2544     if not isinstance(base, str) or not re.match(
2545             r'^(?:https?:)?//', base):
2546         return None
2547     return urllib.parse.urljoin(base, path)
2548
2549
2550 class HEADRequest(urllib.request.Request):
2551     def get_method(self):
2552         return 'HEAD'
2553
2554
2555 class PUTRequest(urllib.request.Request):
2556     def get_method(self):
2557         return 'PUT'
2558
2559
2560 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2561     if get_attr and v is not None:
2562         v = getattr(v, get_attr, None)
2563     try:
2564         return int(v) * invscale // scale
2565     except (ValueError, TypeError, OverflowError):
2566         return default
2567
2568
2569 def str_or_none(v, default=None):
2570     return default if v is None else str(v)
2571
2572
2573 def str_to_int(int_str):
2574     """ A more relaxed version of int_or_none """
2575     if isinstance(int_str, int):
2576         return int_str
2577     elif isinstance(int_str, str):
2578         int_str = re.sub(r'[,\.\+]', '', int_str)
2579         return int_or_none(int_str)
2580
2581
2582 def float_or_none(v, scale=1, invscale=1, default=None):
2583     if v is None:
2584         return default
2585     try:
2586         return float(v) * invscale / scale
2587     except (ValueError, TypeError):
2588         return default
2589
2590
2591 def bool_or_none(v, default=None):
2592     return v if isinstance(v, bool) else default
2593
2594
2595 def strip_or_none(v, default=None):
2596     return v.strip() if isinstance(v, str) else default
2597
2598
2599 def url_or_none(url):
2600     if not url or not isinstance(url, str):
2601         return None
2602     url = url.strip()
2603     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2604
2605
2606 def request_to_url(req):
2607     if isinstance(req, urllib.request.Request):
2608         return req.get_full_url()
2609     else:
2610         return req
2611
2612
2613 def strftime_or_none(timestamp, date_format, default=None):
2614     datetime_object = None
2615     try:
2616         if isinstance(timestamp, (int, float)):  # unix timestamp
2617             # Using naive datetime here can break timestamp() in Windows
2618             # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
2619             datetime_object = datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
2620         elif isinstance(timestamp, str):  # assume YYYYMMDD
2621             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2622         date_format = re.sub(  # Support %s on windows
2623             r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
2624         return datetime_object.strftime(date_format)
2625     except (ValueError, TypeError, AttributeError):
2626         return default
2627
2628
2629 def parse_duration(s):
2630     if not isinstance(s, str):
2631         return None
2632     s = s.strip()
2633     if not s:
2634         return None
2635
2636     days, hours, mins, secs, ms = [None] * 5
2637     m = re.match(r'''(?x)
2638             (?P<before_secs>
2639                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2640             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2641             (?P<ms>[.:][0-9]+)?Z?$
2642         ''', s)
2643     if m:
2644         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2645     else:
2646         m = re.match(
2647             r'''(?ix)(?:P?
2648                 (?:
2649                     [0-9]+\s*y(?:ears?)?,?\s*
2650                 )?
2651                 (?:
2652                     [0-9]+\s*m(?:onths?)?,?\s*
2653                 )?
2654                 (?:
2655                     [0-9]+\s*w(?:eeks?)?,?\s*
2656                 )?
2657                 (?:
2658                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2659                 )?
2660                 T)?
2661                 (?:
2662                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2663                 )?
2664                 (?:
2665                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2666                 )?
2667                 (?:
2668                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2669                 )?Z?$''', s)
2670         if m:
2671             days, hours, mins, secs, ms = m.groups()
2672         else:
2673             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2674             if m:
2675                 hours, mins = m.groups()
2676             else:
2677                 return None
2678
2679     if ms:
2680         ms = ms.replace(':', '.')
2681     return sum(float(part or 0) * mult for part, mult in (
2682         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2683
2684
2685 def prepend_extension(filename, ext, expected_real_ext=None):
2686     name, real_ext = os.path.splitext(filename)
2687     return (
2688         f'{name}.{ext}{real_ext}'
2689         if not expected_real_ext or real_ext[1:] == expected_real_ext
2690         else f'{filename}.{ext}')
2691
2692
2693 def replace_extension(filename, ext, expected_real_ext=None):
2694     name, real_ext = os.path.splitext(filename)
2695     return '{}.{}'.format(
2696         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2697         ext)
2698
2699
2700 def check_executable(exe, args=[]):
2701     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2702     args can be a list of arguments for a short output (like -version) """
2703     try:
2704         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2705     except OSError:
2706         return False
2707     return exe
2708
2709
2710 def _get_exe_version_output(exe, args):
2711     try:
2712         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2713         # SIGTTOU if yt-dlp is run in the background.
2714         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2715         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2716                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2717     except OSError:
2718         return False
2719     return stdout
2720
2721
2722 def detect_exe_version(output, version_re=None, unrecognized='present'):
2723     assert isinstance(output, str)
2724     if version_re is None:
2725         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2726     m = re.search(version_re, output)
2727     if m:
2728         return m.group(1)
2729     else:
2730         return unrecognized
2731
2732
2733 def get_exe_version(exe, args=['--version'],
2734                     version_re=None, unrecognized='present'):
2735     """ Returns the version of the specified executable,
2736     or False if the executable is not present """
2737     out = _get_exe_version_output(exe, args)
2738     return detect_exe_version(out, version_re, unrecognized) if out else False
2739
2740
2741 def frange(start=0, stop=None, step=1):
2742     """Float range"""
2743     if stop is None:
2744         start, stop = 0, start
2745     sign = [-1, 1][step > 0] if step else 0
2746     while sign * start < sign * stop:
2747         yield start
2748         start += step
2749
2750
2751 class LazyList(collections.abc.Sequence):
2752     """Lazy immutable list from an iterable
2753     Note that slices of a LazyList are lists and not LazyList"""
2754
2755     class IndexError(IndexError):
2756         pass
2757
2758     def __init__(self, iterable, *, reverse=False, _cache=None):
2759         self._iterable = iter(iterable)
2760         self._cache = [] if _cache is None else _cache
2761         self._reversed = reverse
2762
2763     def __iter__(self):
2764         if self._reversed:
2765             # We need to consume the entire iterable to iterate in reverse
2766             yield from self.exhaust()
2767             return
2768         yield from self._cache
2769         for item in self._iterable:
2770             self._cache.append(item)
2771             yield item
2772
2773     def _exhaust(self):
2774         self._cache.extend(self._iterable)
2775         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2776         return self._cache
2777
2778     def exhaust(self):
2779         """Evaluate the entire iterable"""
2780         return self._exhaust()[::-1 if self._reversed else 1]
2781
2782     @staticmethod
2783     def _reverse_index(x):
2784         return None if x is None else ~x
2785
2786     def __getitem__(self, idx):
2787         if isinstance(idx, slice):
2788             if self._reversed:
2789                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2790             start, stop, step = idx.start, idx.stop, idx.step or 1
2791         elif isinstance(idx, int):
2792             if self._reversed:
2793                 idx = self._reverse_index(idx)
2794             start, stop, step = idx, idx, 0
2795         else:
2796             raise TypeError('indices must be integers or slices')
2797         if ((start or 0) < 0 or (stop or 0) < 0
2798                 or (start is None and step < 0)
2799                 or (stop is None and step > 0)):
2800             # We need to consume the entire iterable to be able to slice from the end
2801             # Obviously, never use this with infinite iterables
2802             self._exhaust()
2803             try:
2804                 return self._cache[idx]
2805             except IndexError as e:
2806                 raise self.IndexError(e) from e
2807         n = max(start or 0, stop or 0) - len(self._cache) + 1
2808         if n > 0:
2809             self._cache.extend(itertools.islice(self._iterable, n))
2810         try:
2811             return self._cache[idx]
2812         except IndexError as e:
2813             raise self.IndexError(e) from e
2814
2815     def __bool__(self):
2816         try:
2817             self[-1] if self._reversed else self[0]
2818         except self.IndexError:
2819             return False
2820         return True
2821
2822     def __len__(self):
2823         self._exhaust()
2824         return len(self._cache)
2825
2826     def __reversed__(self):
2827         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2828
2829     def __copy__(self):
2830         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2831
2832     def __repr__(self):
2833         # repr and str should mimic a list. So we exhaust the iterable
2834         return repr(self.exhaust())
2835
2836     def __str__(self):
2837         return repr(self.exhaust())
2838
2839
2840 class PagedList:
2841
2842     class IndexError(IndexError):
2843         pass
2844
2845     def __len__(self):
2846         # This is only useful for tests
2847         return len(self.getslice())
2848
2849     def __init__(self, pagefunc, pagesize, use_cache=True):
2850         self._pagefunc = pagefunc
2851         self._pagesize = pagesize
2852         self._pagecount = float('inf')
2853         self._use_cache = use_cache
2854         self._cache = {}
2855
2856     def getpage(self, pagenum):
2857         page_results = self._cache.get(pagenum)
2858         if page_results is None:
2859             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2860         if self._use_cache:
2861             self._cache[pagenum] = page_results
2862         return page_results
2863
2864     def getslice(self, start=0, end=None):
2865         return list(self._getslice(start, end))
2866
2867     def _getslice(self, start, end):
2868         raise NotImplementedError('This method must be implemented by subclasses')
2869
2870     def __getitem__(self, idx):
2871         assert self._use_cache, 'Indexing PagedList requires cache'
2872         if not isinstance(idx, int) or idx < 0:
2873             raise TypeError('indices must be non-negative integers')
2874         entries = self.getslice(idx, idx + 1)
2875         if not entries:
2876             raise self.IndexError()
2877         return entries[0]
2878
2879
2880 class OnDemandPagedList(PagedList):
2881     """Download pages until a page with less than maximum results"""
2882
2883     def _getslice(self, start, end):
2884         for pagenum in itertools.count(start // self._pagesize):
2885             firstid = pagenum * self._pagesize
2886             nextfirstid = pagenum * self._pagesize + self._pagesize
2887             if start >= nextfirstid:
2888                 continue
2889
2890             startv = (
2891                 start % self._pagesize
2892                 if firstid <= start < nextfirstid
2893                 else 0)
2894             endv = (
2895                 ((end - 1) % self._pagesize) + 1
2896                 if (end is not None and firstid <= end <= nextfirstid)
2897                 else None)
2898
2899             try:
2900                 page_results = self.getpage(pagenum)
2901             except Exception:
2902                 self._pagecount = pagenum - 1
2903                 raise
2904             if startv != 0 or endv is not None:
2905                 page_results = page_results[startv:endv]
2906             yield from page_results
2907
2908             # A little optimization - if current page is not "full", ie. does
2909             # not contain page_size videos then we can assume that this page
2910             # is the last one - there are no more ids on further pages -
2911             # i.e. no need to query again.
2912             if len(page_results) + startv < self._pagesize:
2913                 break
2914
2915             # If we got the whole page, but the next page is not interesting,
2916             # break out early as well
2917             if end == nextfirstid:
2918                 break
2919
2920
2921 class InAdvancePagedList(PagedList):
2922     """PagedList with total number of pages known in advance"""
2923
2924     def __init__(self, pagefunc, pagecount, pagesize):
2925         PagedList.__init__(self, pagefunc, pagesize, True)
2926         self._pagecount = pagecount
2927
2928     def _getslice(self, start, end):
2929         start_page = start // self._pagesize
2930         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2931         skip_elems = start - start_page * self._pagesize
2932         only_more = None if end is None else end - start
2933         for pagenum in range(start_page, end_page):
2934             page_results = self.getpage(pagenum)
2935             if skip_elems:
2936                 page_results = page_results[skip_elems:]
2937                 skip_elems = None
2938             if only_more is not None:
2939                 if len(page_results) < only_more:
2940                     only_more -= len(page_results)
2941                 else:
2942                     yield from page_results[:only_more]
2943                     break
2944             yield from page_results
2945
2946
2947 class PlaylistEntries:
2948     MissingEntry = object()
2949     is_exhausted = False
2950
2951     def __init__(self, ydl, info_dict):
2952         self.ydl = ydl
2953
2954         # _entries must be assigned now since infodict can change during iteration
2955         entries = info_dict.get('entries')
2956         if entries is None:
2957             raise EntryNotInPlaylist('There are no entries')
2958         elif isinstance(entries, list):
2959             self.is_exhausted = True
2960
2961         requested_entries = info_dict.get('requested_entries')
2962         self.is_incomplete = requested_entries is not None
2963         if self.is_incomplete:
2964             assert self.is_exhausted
2965             self._entries = [self.MissingEntry] * max(requested_entries or [0])
2966             for i, entry in zip(requested_entries, entries):
2967                 self._entries[i - 1] = entry
2968         elif isinstance(entries, (list, PagedList, LazyList)):
2969             self._entries = entries
2970         else:
2971             self._entries = LazyList(entries)
2972
2973     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2974         (?P<start>[+-]?\d+)?
2975         (?P<range>[:-]
2976             (?P<end>[+-]?\d+|inf(?:inite)?)?
2977             (?::(?P<step>[+-]?\d+))?
2978         )?''')
2979
2980     @classmethod
2981     def parse_playlist_items(cls, string):
2982         for segment in string.split(','):
2983             if not segment:
2984                 raise ValueError('There is two or more consecutive commas')
2985             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2986             if not mobj:
2987                 raise ValueError(f'{segment!r} is not a valid specification')
2988             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2989             if int_or_none(step) == 0:
2990                 raise ValueError(f'Step in {segment!r} cannot be zero')
2991             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2992
2993     def get_requested_items(self):
2994         playlist_items = self.ydl.params.get('playlist_items')
2995         playlist_start = self.ydl.params.get('playliststart', 1)
2996         playlist_end = self.ydl.params.get('playlistend')
2997         # For backwards compatibility, interpret -1 as whole list
2998         if playlist_end in (-1, None):
2999             playlist_end = ''
3000         if not playlist_items:
3001             playlist_items = f'{playlist_start}:{playlist_end}'
3002         elif playlist_start != 1 or playlist_end:
3003             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
3004
3005         for index in self.parse_playlist_items(playlist_items):
3006             for i, entry in self[index]:
3007                 yield i, entry
3008                 if not entry:
3009                     continue
3010                 try:
3011                     # TODO: Add auto-generated fields
3012                     self.ydl._match_entry(entry, incomplete=True, silent=True)
3013                 except (ExistingVideoReached, RejectedVideoReached):
3014                     return
3015
3016     def get_full_count(self):
3017         if self.is_exhausted and not self.is_incomplete:
3018             return len(self)
3019         elif isinstance(self._entries, InAdvancePagedList):
3020             if self._entries._pagesize == 1:
3021                 return self._entries._pagecount
3022
3023     @functools.cached_property
3024     def _getter(self):
3025         if isinstance(self._entries, list):
3026             def get_entry(i):
3027                 try:
3028                     entry = self._entries[i]
3029                 except IndexError:
3030                     entry = self.MissingEntry
3031                     if not self.is_incomplete:
3032                         raise self.IndexError()
3033                 if entry is self.MissingEntry:
3034                     raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found')
3035                 return entry
3036         else:
3037             def get_entry(i):
3038                 try:
3039                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
3040                 except (LazyList.IndexError, PagedList.IndexError):
3041                     raise self.IndexError()
3042         return get_entry
3043
3044     def __getitem__(self, idx):
3045         if isinstance(idx, int):
3046             idx = slice(idx, idx)
3047
3048         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
3049         step = 1 if idx.step is None else idx.step
3050         if idx.start is None:
3051             start = 0 if step > 0 else len(self) - 1
3052         else:
3053             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
3054
3055         # NB: Do not call len(self) when idx == [:]
3056         if idx.stop is None:
3057             stop = 0 if step < 0 else float('inf')
3058         else:
3059             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
3060         stop += [-1, 1][step > 0]
3061
3062         for i in frange(start, stop, step):
3063             if i < 0:
3064                 continue
3065             try:
3066                 entry = self._getter(i)
3067             except self.IndexError:
3068                 self.is_exhausted = True
3069                 if step > 0:
3070                     break
3071                 continue
3072             yield i + 1, entry
3073
3074     def __len__(self):
3075         return len(tuple(self[:]))
3076
3077     class IndexError(IndexError):
3078         pass
3079
3080
3081 def uppercase_escape(s):
3082     unicode_escape = codecs.getdecoder('unicode_escape')
3083     return re.sub(
3084         r'\\U[0-9a-fA-F]{8}',
3085         lambda m: unicode_escape(m.group(0))[0],
3086         s)
3087
3088
3089 def lowercase_escape(s):
3090     unicode_escape = codecs.getdecoder('unicode_escape')
3091     return re.sub(
3092         r'\\u[0-9a-fA-F]{4}',
3093         lambda m: unicode_escape(m.group(0))[0],
3094         s)
3095
3096
3097 def escape_rfc3986(s):
3098     """Escape non-ASCII characters as suggested by RFC 3986"""
3099     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3100
3101
3102 def escape_url(url):
3103     """Escape URL as suggested by RFC 3986"""
3104     url_parsed = urllib.parse.urlparse(url)
3105     return url_parsed._replace(
3106         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3107         path=escape_rfc3986(url_parsed.path),
3108         params=escape_rfc3986(url_parsed.params),
3109         query=escape_rfc3986(url_parsed.query),
3110         fragment=escape_rfc3986(url_parsed.fragment)
3111     ).geturl()
3112
3113
3114 def parse_qs(url, **kwargs):
3115     return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
3116
3117
3118 def read_batch_urls(batch_fd):
3119     def fixup(url):
3120         if not isinstance(url, str):
3121             url = url.decode('utf-8', 'replace')
3122         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3123         for bom in BOM_UTF8:
3124             if url.startswith(bom):
3125                 url = url[len(bom):]
3126         url = url.lstrip()
3127         if not url or url.startswith(('#', ';', ']')):
3128             return False
3129         # "#" cannot be stripped out since it is part of the URI
3130         # However, it can be safely stripped out if following a whitespace
3131         return re.split(r'\s#', url, 1)[0].rstrip()
3132
3133     with contextlib.closing(batch_fd) as fd:
3134         return [url for url in map(fixup, fd) if url]
3135
3136
3137 def urlencode_postdata(*args, **kargs):
3138     return urllib.parse.urlencode(*args, **kargs).encode('ascii')
3139
3140
3141 def update_url_query(url, query):
3142     if not query:
3143         return url
3144     parsed_url = urllib.parse.urlparse(url)
3145     qs = urllib.parse.parse_qs(parsed_url.query)
3146     qs.update(query)
3147     return urllib.parse.urlunparse(parsed_url._replace(
3148         query=urllib.parse.urlencode(qs, True)))
3149
3150
3151 def update_Request(req, url=None, data=None, headers=None, query=None):
3152     req_headers = req.headers.copy()
3153     req_headers.update(headers or {})
3154     req_data = data or req.data
3155     req_url = update_url_query(url or req.get_full_url(), query)
3156     req_get_method = req.get_method()
3157     if req_get_method == 'HEAD':
3158         req_type = HEADRequest
3159     elif req_get_method == 'PUT':
3160         req_type = PUTRequest
3161     else:
3162         req_type = urllib.request.Request
3163     new_req = req_type(
3164         req_url, data=req_data, headers=req_headers,
3165         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3166     if hasattr(req, 'timeout'):
3167         new_req.timeout = req.timeout
3168     return new_req
3169
3170
3171 def _multipart_encode_impl(data, boundary):
3172     content_type = 'multipart/form-data; boundary=%s' % boundary
3173
3174     out = b''
3175     for k, v in data.items():
3176         out += b'--' + boundary.encode('ascii') + b'\r\n'
3177         if isinstance(k, str):
3178             k = k.encode()
3179         if isinstance(v, str):
3180             v = v.encode()
3181         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3182         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3183         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3184         if boundary.encode('ascii') in content:
3185             raise ValueError('Boundary overlaps with data')
3186         out += content
3187
3188     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3189
3190     return out, content_type
3191
3192
3193 def multipart_encode(data, boundary=None):
3194     '''
3195     Encode a dict to RFC 7578-compliant form-data
3196
3197     data:
3198         A dict where keys and values can be either Unicode or bytes-like
3199         objects.
3200     boundary:
3201         If specified a Unicode object, it's used as the boundary. Otherwise
3202         a random boundary is generated.
3203
3204     Reference: https://tools.ietf.org/html/rfc7578
3205     '''
3206     has_specified_boundary = boundary is not None
3207
3208     while True:
3209         if boundary is None:
3210             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3211
3212         try:
3213             out, content_type = _multipart_encode_impl(data, boundary)
3214             break
3215         except ValueError:
3216             if has_specified_boundary:
3217                 raise
3218             boundary = None
3219
3220     return out, content_type
3221
3222
3223 def variadic(x, allowed_types=(str, bytes, dict)):
3224     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
3225
3226
3227 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3228     for val in map(d.get, variadic(key_or_keys)):
3229         if val is not None and (val or not skip_false_values):
3230             return val
3231     return default
3232
3233
3234 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3235     for f in funcs:
3236         try:
3237             val = f(*args, **kwargs)
3238         except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
3239             pass
3240         else:
3241             if expected_type is None or isinstance(val, expected_type):
3242                 return val
3243
3244
3245 def try_get(src, getter, expected_type=None):
3246     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3247
3248
3249 def filter_dict(dct, cndn=lambda _, v: v is not None):
3250     return {k: v for k, v in dct.items() if cndn(k, v)}
3251
3252
3253 def merge_dicts(*dicts):
3254     merged = {}
3255     for a_dict in dicts:
3256         for k, v in a_dict.items():
3257             if (v is not None and k not in merged
3258                     or isinstance(v, str) and merged[k] == ''):
3259                 merged[k] = v
3260     return merged
3261
3262
3263 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3264     return string if isinstance(string, str) else str(string, encoding, errors)
3265
3266
3267 US_RATINGS = {
3268     'G': 0,
3269     'PG': 10,
3270     'PG-13': 13,
3271     'R': 16,
3272     'NC': 18,
3273 }
3274
3275
3276 TV_PARENTAL_GUIDELINES = {
3277     'TV-Y': 0,
3278     'TV-Y7': 7,
3279     'TV-G': 0,
3280     'TV-PG': 0,
3281     'TV-14': 14,
3282     'TV-MA': 17,
3283 }
3284
3285
3286 def parse_age_limit(s):
3287     # isinstance(False, int) is True. So type() must be used instead
3288     if type(s) is int:  # noqa: E721
3289         return s if 0 <= s <= 21 else None
3290     elif not isinstance(s, str):
3291         return None
3292     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3293     if m:
3294         return int(m.group('age'))
3295     s = s.upper()
3296     if s in US_RATINGS:
3297         return US_RATINGS[s]
3298     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3299     if m:
3300         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3301     return None
3302
3303
3304 def strip_jsonp(code):
3305     return re.sub(
3306         r'''(?sx)^
3307             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3308             (?:\s*&&\s*(?P=func_name))?
3309             \s*\(\s*(?P<callback_data>.*)\);?
3310             \s*?(?://[^\n]*)*$''',
3311         r'\g<callback_data>', code)
3312
3313
3314 def js_to_json(code, vars={}, *, strict=False):
3315     # vars is a dict of var, val pairs to substitute
3316     STRING_QUOTES = '\'"'
3317     STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
3318     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3319     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3320     INTEGER_TABLE = (
3321         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3322         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3323     )
3324
3325     def process_escape(match):
3326         JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
3327         escape = match.group(1) or match.group(2)
3328
3329         return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
3330                 else R'\u00' if escape == 'x'
3331                 else '' if escape == '\n'
3332                 else escape)
3333
3334     def fix_kv(m):
3335         v = m.group(0)
3336         if v in ('true', 'false', 'null'):
3337             return v
3338         elif v in ('undefined', 'void 0'):
3339             return 'null'
3340         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3341             return ''
3342
3343         if v[0] in STRING_QUOTES:
3344             escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v[1:-1])
3345             return f'"{escaped}"'
3346
3347         for regex, base in INTEGER_TABLE:
3348             im = re.match(regex, v)
3349             if im:
3350                 i = int(im.group(1), base)
3351                 return f'"{i}":' if v.endswith(':') else str(i)
3352
3353         if v in vars:
3354             return json.dumps(vars[v])
3355
3356         if not strict:
3357             return f'"{v}"'
3358
3359         raise ValueError(f'Unknown value: {v}')
3360
3361     def create_map(mobj):
3362         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
3363
3364     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
3365     if not strict:
3366         code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3367         code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
3368
3369     return re.sub(rf'''(?sx)
3370         {STRING_RE}|
3371         {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
3372         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3373         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
3374         [0-9]+(?={SKIP_RE}:)|
3375         !+
3376         ''', fix_kv, code)
3377
3378
3379 def qualities(quality_ids):
3380     """ Get a numeric quality value out of a list of possible values """
3381     def q(qid):
3382         try:
3383             return quality_ids.index(qid)
3384         except ValueError:
3385             return -1
3386     return q
3387
3388
3389 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3390
3391
3392 DEFAULT_OUTTMPL = {
3393     'default': '%(title)s [%(id)s].%(ext)s',
3394     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3395 }
3396 OUTTMPL_TYPES = {
3397     'chapter': None,
3398     'subtitle': None,
3399     'thumbnail': None,
3400     'description': 'description',
3401     'annotation': 'annotations.xml',
3402     'infojson': 'info.json',
3403     'link': None,
3404     'pl_video': None,
3405     'pl_thumbnail': None,
3406     'pl_description': 'description',
3407     'pl_infojson': 'info.json',
3408 }
3409
3410 # As of [1] format syntax is:
3411 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3412 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3413 STR_FORMAT_RE_TMPL = r'''(?x)
3414     (?<!%)(?P<prefix>(?:%%)*)
3415     %
3416     (?P<has_key>\((?P<key>{0})\))?
3417     (?P<format>
3418         (?P<conversion>[#0\-+ ]+)?
3419         (?P<min_width>\d+)?
3420         (?P<precision>\.\d+)?
3421         (?P<len_mod>[hlL])?  # unused in python
3422         {1}  # conversion type
3423     )
3424 '''
3425
3426
3427 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3428
3429
3430 def limit_length(s, length):
3431     """ Add ellipses to overly long strings """
3432     if s is None:
3433         return None
3434     ELLIPSES = '...'
3435     if len(s) > length:
3436         return s[:length - len(ELLIPSES)] + ELLIPSES
3437     return s
3438
3439
3440 def version_tuple(v):
3441     return tuple(int(e) for e in re.split(r'[-.]', v))
3442
3443
3444 def is_outdated_version(version, limit, assume_new=True):
3445     if not version:
3446         return not assume_new
3447     try:
3448         return version_tuple(version) < version_tuple(limit)
3449     except ValueError:
3450         return not assume_new
3451
3452
3453 def ytdl_is_updateable():
3454     """ Returns if yt-dlp can be updated with -U """
3455
3456     from .update import is_non_updateable
3457
3458     return not is_non_updateable()
3459
3460
3461 def args_to_str(args):
3462     # Get a short string representation for a subprocess command
3463     return ' '.join(compat_shlex_quote(a) for a in args)
3464
3465
3466 def error_to_compat_str(err):
3467     return str(err)
3468
3469
3470 def error_to_str(err):
3471     return f'{type(err).__name__}: {err}'
3472
3473
3474 def mimetype2ext(mt):
3475     if mt is None:
3476         return None
3477
3478     mt, _, params = mt.partition(';')
3479     mt = mt.strip()
3480
3481     FULL_MAP = {
3482         'audio/mp4': 'm4a',
3483         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3484         # it's the most popular one
3485         'audio/mpeg': 'mp3',
3486         'audio/x-wav': 'wav',
3487         'audio/wav': 'wav',
3488         'audio/wave': 'wav',
3489     }
3490
3491     ext = FULL_MAP.get(mt)
3492     if ext is not None:
3493         return ext
3494
3495     SUBTYPE_MAP = {
3496         '3gpp': '3gp',
3497         'smptett+xml': 'tt',
3498         'ttaf+xml': 'dfxp',
3499         'ttml+xml': 'ttml',
3500         'x-flv': 'flv',
3501         'x-mp4-fragmented': 'mp4',
3502         'x-ms-sami': 'sami',
3503         'x-ms-wmv': 'wmv',
3504         'mpegurl': 'm3u8',
3505         'x-mpegurl': 'm3u8',
3506         'vnd.apple.mpegurl': 'm3u8',
3507         'dash+xml': 'mpd',
3508         'f4m+xml': 'f4m',
3509         'hds+xml': 'f4m',
3510         'vnd.ms-sstr+xml': 'ism',
3511         'quicktime': 'mov',
3512         'mp2t': 'ts',
3513         'x-wav': 'wav',
3514         'filmstrip+json': 'fs',
3515         'svg+xml': 'svg',
3516     }
3517
3518     _, _, subtype = mt.rpartition('/')
3519     ext = SUBTYPE_MAP.get(subtype.lower())
3520     if ext is not None:
3521         return ext
3522
3523     SUFFIX_MAP = {
3524         'json': 'json',
3525         'xml': 'xml',
3526         'zip': 'zip',
3527         'gzip': 'gz',
3528     }
3529
3530     _, _, suffix = subtype.partition('+')
3531     ext = SUFFIX_MAP.get(suffix)
3532     if ext is not None:
3533         return ext
3534
3535     return subtype.replace('+', '.')
3536
3537
3538 def ext2mimetype(ext_or_url):
3539     if not ext_or_url:
3540         return None
3541     if '.' not in ext_or_url:
3542         ext_or_url = f'file.{ext_or_url}'
3543     return mimetypes.guess_type(ext_or_url)[0]
3544
3545
3546 def parse_codecs(codecs_str):
3547     # http://tools.ietf.org/html/rfc6381
3548     if not codecs_str:
3549         return {}
3550     split_codecs = list(filter(None, map(
3551         str.strip, codecs_str.strip().strip(',').split(','))))
3552     vcodec, acodec, scodec, hdr = None, None, None, None
3553     for full_codec in split_codecs:
3554         parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
3555         if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3556                         'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3557             if vcodec:
3558                 continue
3559             vcodec = full_codec
3560             if parts[0] in ('dvh1', 'dvhe'):
3561                 hdr = 'DV'
3562             elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
3563                 hdr = 'HDR10'
3564             elif parts[:2] == ['vp9', '2']:
3565                 hdr = 'HDR10'
3566         elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac',
3567                           'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3568             acodec = acodec or full_codec
3569         elif parts[0] in ('stpp', 'wvtt'):
3570             scodec = scodec or full_codec
3571         else:
3572             write_string(f'WARNING: Unknown codec {full_codec}\n')
3573     if vcodec or acodec or scodec:
3574         return {
3575             'vcodec': vcodec or 'none',
3576             'acodec': acodec or 'none',
3577             'dynamic_range': hdr,
3578             **({'scodec': scodec} if scodec is not None else {}),
3579         }
3580     elif len(split_codecs) == 2:
3581         return {
3582             'vcodec': split_codecs[0],
3583             'acodec': split_codecs[1],
3584         }
3585     return {}
3586
3587
3588 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
3589     assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
3590
3591     allow_mkv = not preferences or 'mkv' in preferences
3592
3593     if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3594         return 'mkv'  # TODO: any other format allows this?
3595
3596     # TODO: All codecs supported by parse_codecs isn't handled here
3597     COMPATIBLE_CODECS = {
3598         'mp4': {
3599             'av1', 'hevc', 'avc1', 'mp4a',  # fourcc (m3u8, mpd)
3600             'h264', 'aacl', 'ec-3',  # Set in ISM
3601         },
3602         'webm': {
3603             'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3604             'vp9x', 'vp8x',  # in the webm spec
3605         },
3606     }
3607
3608     sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
3609     vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3610
3611     for ext in preferences or COMPATIBLE_CODECS.keys():
3612         codec_set = COMPATIBLE_CODECS.get(ext, set())
3613         if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3614             return ext
3615
3616     COMPATIBLE_EXTS = (
3617         {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3618         {'webm'},
3619     )
3620     for ext in preferences or vexts:
3621         current_exts = {ext, *vexts, *aexts}
3622         if ext == 'mkv' or current_exts == {ext} or any(
3623                 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3624             return ext
3625     return 'mkv' if allow_mkv else preferences[-1]
3626
3627
3628 def urlhandle_detect_ext(url_handle):
3629     getheader = url_handle.headers.get
3630
3631     cd = getheader('Content-Disposition')
3632     if cd:
3633         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3634         if m:
3635             e = determine_ext(m.group('filename'), default_ext=None)
3636             if e:
3637                 return e
3638
3639     return mimetype2ext(getheader('Content-Type'))
3640
3641
3642 def encode_data_uri(data, mime_type):
3643     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3644
3645
3646 def age_restricted(content_limit, age_limit):
3647     """ Returns True iff the content should be blocked """
3648
3649     if age_limit is None:  # No limit set
3650         return False
3651     if content_limit is None:
3652         return False  # Content available for everyone
3653     return age_limit < content_limit
3654
3655
3656 # List of known byte-order-marks (BOM)
3657 BOMS = [
3658     (b'\xef\xbb\xbf', 'utf-8'),
3659     (b'\x00\x00\xfe\xff', 'utf-32-be'),
3660     (b'\xff\xfe\x00\x00', 'utf-32-le'),
3661     (b'\xff\xfe', 'utf-16-le'),
3662     (b'\xfe\xff', 'utf-16-be'),
3663 ]
3664
3665
3666 def is_html(first_bytes):
3667     """ Detect whether a file contains HTML by examining its first bytes. """
3668
3669     encoding = 'utf-8'
3670     for bom, enc in BOMS:
3671         while first_bytes.startswith(bom):
3672             encoding, first_bytes = enc, first_bytes[len(bom):]
3673
3674     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3675
3676
3677 def determine_protocol(info_dict):
3678     protocol = info_dict.get('protocol')
3679     if protocol is not None:
3680         return protocol
3681
3682     url = sanitize_url(info_dict['url'])
3683     if url.startswith('rtmp'):
3684         return 'rtmp'
3685     elif url.startswith('mms'):
3686         return 'mms'
3687     elif url.startswith('rtsp'):
3688         return 'rtsp'
3689
3690     ext = determine_ext(url)
3691     if ext == 'm3u8':
3692         return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
3693     elif ext == 'f4m':
3694         return 'f4m'
3695
3696     return urllib.parse.urlparse(url).scheme
3697
3698
3699 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3700     """ Render a list of rows, each as a list of values.
3701     Text after a \t will be right aligned """
3702     def width(string):
3703         return len(remove_terminal_sequences(string).replace('\t', ''))
3704
3705     def get_max_lens(table):
3706         return [max(width(str(v)) for v in col) for col in zip(*table)]
3707
3708     def filter_using_list(row, filterArray):
3709         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3710
3711     max_lens = get_max_lens(data) if hide_empty else []
3712     header_row = filter_using_list(header_row, max_lens)
3713     data = [filter_using_list(row, max_lens) for row in data]
3714
3715     table = [header_row] + data
3716     max_lens = get_max_lens(table)
3717     extra_gap += 1
3718     if delim:
3719         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3720         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3721     for row in table:
3722         for pos, text in enumerate(map(str, row)):
3723             if '\t' in text:
3724                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3725             else:
3726                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3727     ret = '\n'.join(''.join(row).rstrip() for row in table)
3728     return ret
3729
3730
3731 def _match_one(filter_part, dct, incomplete):
3732     # TODO: Generalize code with YoutubeDL._build_format_filter
3733     STRING_OPERATORS = {
3734         '*=': operator.contains,
3735         '^=': lambda attr, value: attr.startswith(value),
3736         '$=': lambda attr, value: attr.endswith(value),
3737         '~=': lambda attr, value: re.search(value, attr),
3738     }
3739     COMPARISON_OPERATORS = {
3740         **STRING_OPERATORS,
3741         '<=': operator.le,  # "<=" must be defined above "<"
3742         '<': operator.lt,
3743         '>=': operator.ge,
3744         '>': operator.gt,
3745         '=': operator.eq,
3746     }
3747
3748     if isinstance(incomplete, bool):
3749         is_incomplete = lambda _: incomplete
3750     else:
3751         is_incomplete = lambda k: k in incomplete
3752
3753     operator_rex = re.compile(r'''(?x)
3754         (?P<key>[a-z_]+)
3755         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3756         (?:
3757             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3758             (?P<strval>.+?)
3759         )
3760         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3761     m = operator_rex.fullmatch(filter_part.strip())
3762     if m:
3763         m = m.groupdict()
3764         unnegated_op = COMPARISON_OPERATORS[m['op']]
3765         if m['negation']:
3766             op = lambda attr, value: not unnegated_op(attr, value)
3767         else:
3768             op = unnegated_op
3769         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3770         if m['quote']:
3771             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3772         actual_value = dct.get(m['key'])
3773         numeric_comparison = None
3774         if isinstance(actual_value, (int, float)):
3775             # If the original field is a string and matching comparisonvalue is
3776             # a number we should respect the origin of the original field
3777             # and process comparison value as a string (see
3778             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3779             try:
3780                 numeric_comparison = int(comparison_value)
3781             except ValueError:
3782                 numeric_comparison = parse_filesize(comparison_value)
3783                 if numeric_comparison is None:
3784                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3785                 if numeric_comparison is None:
3786                     numeric_comparison = parse_duration(comparison_value)
3787         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3788             raise ValueError('Operator %s only supports string values!' % m['op'])
3789         if actual_value is None:
3790             return is_incomplete(m['key']) or m['none_inclusive']
3791         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3792
3793     UNARY_OPERATORS = {
3794         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3795         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3796     }
3797     operator_rex = re.compile(r'''(?x)
3798         (?P<op>%s)\s*(?P<key>[a-z_]+)
3799         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3800     m = operator_rex.fullmatch(filter_part.strip())
3801     if m:
3802         op = UNARY_OPERATORS[m.group('op')]
3803         actual_value = dct.get(m.group('key'))
3804         if is_incomplete(m.group('key')) and actual_value is None:
3805             return True
3806         return op(actual_value)
3807
3808     raise ValueError('Invalid filter part %r' % filter_part)
3809
3810
3811 def match_str(filter_str, dct, incomplete=False):
3812     """ Filter a dictionary with a simple string syntax.
3813     @returns           Whether the filter passes
3814     @param incomplete  Set of keys that is expected to be missing from dct.
3815                        Can be True/False to indicate all/none of the keys may be missing.
3816                        All conditions on incomplete keys pass if the key is missing
3817     """
3818     return all(
3819         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3820         for filter_part in re.split(r'(?<!\\)&', filter_str))
3821
3822
3823 def match_filter_func(filters):
3824     if not filters:
3825         return None
3826     filters = set(variadic(filters))
3827
3828     interactive = '-' in filters
3829     if interactive:
3830         filters.remove('-')
3831
3832     def _match_func(info_dict, incomplete=False):
3833         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3834             return NO_DEFAULT if interactive and not incomplete else None
3835         else:
3836             video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
3837             filter_str = ') | ('.join(map(str.strip, filters))
3838             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3839     return _match_func
3840
3841
3842 class download_range_func:
3843     def __init__(self, chapters, ranges):
3844         self.chapters, self.ranges = chapters, ranges
3845
3846     def __call__(self, info_dict, ydl):
3847         if not self.ranges and not self.chapters:
3848             yield {}
3849
3850         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3851                    else 'Cannot match chapters since chapter information is unavailable')
3852         for regex in self.chapters or []:
3853             for i, chapter in enumerate(info_dict.get('chapters') or []):
3854                 if re.search(regex, chapter['title']):
3855                     warning = None
3856                     yield {**chapter, 'index': i}
3857         if self.chapters and warning:
3858             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3859
3860         yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
3861
3862     def __eq__(self, other):
3863         return (isinstance(other, download_range_func)
3864                 and self.chapters == other.chapters and self.ranges == other.ranges)
3865
3866
3867 def parse_dfxp_time_expr(time_expr):
3868     if not time_expr:
3869         return
3870
3871     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3872     if mobj:
3873         return float(mobj.group('time_offset'))
3874
3875     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3876     if mobj:
3877         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3878
3879
3880 def srt_subtitles_timecode(seconds):
3881     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3882
3883
3884 def ass_subtitles_timecode(seconds):
3885     time = timetuple_from_msec(seconds * 1000)
3886     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3887
3888
3889 def dfxp2srt(dfxp_data):
3890     '''
3891     @param dfxp_data A bytes-like object containing DFXP data
3892     @returns A unicode object containing converted SRT data
3893     '''
3894     LEGACY_NAMESPACES = (
3895         (b'http://www.w3.org/ns/ttml', [
3896             b'http://www.w3.org/2004/11/ttaf1',
3897             b'http://www.w3.org/2006/04/ttaf1',
3898             b'http://www.w3.org/2006/10/ttaf1',
3899         ]),
3900         (b'http://www.w3.org/ns/ttml#styling', [
3901             b'http://www.w3.org/ns/ttml#style',
3902         ]),
3903     )
3904
3905     SUPPORTED_STYLING = [
3906         'color',
3907         'fontFamily',
3908         'fontSize',
3909         'fontStyle',
3910         'fontWeight',
3911         'textDecoration'
3912     ]
3913
3914     _x = functools.partial(xpath_with_ns, ns_map={
3915         'xml': 'http://www.w3.org/XML/1998/namespace',
3916         'ttml': 'http://www.w3.org/ns/ttml',
3917         'tts': 'http://www.w3.org/ns/ttml#styling',
3918     })
3919
3920     styles = {}
3921     default_style = {}
3922
3923     class TTMLPElementParser:
3924         _out = ''
3925         _unclosed_elements = []
3926         _applied_styles = []
3927
3928         def start(self, tag, attrib):
3929             if tag in (_x('ttml:br'), 'br'):
3930                 self._out += '\n'
3931             else:
3932                 unclosed_elements = []
3933                 style = {}
3934                 element_style_id = attrib.get('style')
3935                 if default_style:
3936                     style.update(default_style)
3937                 if element_style_id:
3938                     style.update(styles.get(element_style_id, {}))
3939                 for prop in SUPPORTED_STYLING:
3940                     prop_val = attrib.get(_x('tts:' + prop))
3941                     if prop_val:
3942                         style[prop] = prop_val
3943                 if style:
3944                     font = ''
3945                     for k, v in sorted(style.items()):
3946                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3947                             continue
3948                         if k == 'color':
3949                             font += ' color="%s"' % v
3950                         elif k == 'fontSize':
3951                             font += ' size="%s"' % v
3952                         elif k == 'fontFamily':
3953                             font += ' face="%s"' % v
3954                         elif k == 'fontWeight' and v == 'bold':
3955                             self._out += '<b>'
3956                             unclosed_elements.append('b')
3957                         elif k == 'fontStyle' and v == 'italic':
3958                             self._out += '<i>'
3959                             unclosed_elements.append('i')
3960                         elif k == 'textDecoration' and v == 'underline':
3961                             self._out += '<u>'
3962                             unclosed_elements.append('u')
3963                     if font:
3964                         self._out += '<font' + font + '>'
3965                         unclosed_elements.append('font')
3966                     applied_style = {}
3967                     if self._applied_styles:
3968                         applied_style.update(self._applied_styles[-1])
3969                     applied_style.update(style)
3970                     self._applied_styles.append(applied_style)
3971                 self._unclosed_elements.append(unclosed_elements)
3972
3973         def end(self, tag):
3974             if tag not in (_x('ttml:br'), 'br'):
3975                 unclosed_elements = self._unclosed_elements.pop()
3976                 for element in reversed(unclosed_elements):
3977                     self._out += '</%s>' % element
3978                 if unclosed_elements and self._applied_styles:
3979                     self._applied_styles.pop()
3980
3981         def data(self, data):
3982             self._out += data
3983
3984         def close(self):
3985             return self._out.strip()
3986
3987     def parse_node(node):
3988         target = TTMLPElementParser()
3989         parser = xml.etree.ElementTree.XMLParser(target=target)
3990         parser.feed(xml.etree.ElementTree.tostring(node))
3991         return parser.close()
3992
3993     for k, v in LEGACY_NAMESPACES:
3994         for ns in v:
3995             dfxp_data = dfxp_data.replace(ns, k)
3996
3997     dfxp = compat_etree_fromstring(dfxp_data)
3998     out = []
3999     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4000
4001     if not paras:
4002         raise ValueError('Invalid dfxp/TTML subtitle')
4003
4004     repeat = False
4005     while True:
4006         for style in dfxp.findall(_x('.//ttml:style')):
4007             style_id = style.get('id') or style.get(_x('xml:id'))
4008             if not style_id:
4009                 continue
4010             parent_style_id = style.get('style')
4011             if parent_style_id:
4012                 if parent_style_id not in styles:
4013                     repeat = True
4014                     continue
4015                 styles[style_id] = styles[parent_style_id].copy()
4016             for prop in SUPPORTED_STYLING:
4017                 prop_val = style.get(_x('tts:' + prop))
4018                 if prop_val:
4019                     styles.setdefault(style_id, {})[prop] = prop_val
4020         if repeat:
4021             repeat = False
4022         else:
4023             break
4024
4025     for p in ('body', 'div'):
4026         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4027         if ele is None:
4028             continue
4029         style = styles.get(ele.get('style'))
4030         if not style:
4031             continue
4032         default_style.update(style)
4033
4034     for para, index in zip(paras, itertools.count(1)):
4035         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4036         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4037         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4038         if begin_time is None:
4039             continue
4040         if not end_time:
4041             if not dur:
4042                 continue
4043             end_time = begin_time + dur
4044         out.append('%d\n%s --> %s\n%s\n\n' % (
4045             index,
4046             srt_subtitles_timecode(begin_time),
4047             srt_subtitles_timecode(end_time),
4048             parse_node(para)))
4049
4050     return ''.join(out)
4051
4052
4053 def cli_option(params, command_option, param, separator=None):
4054     param = params.get(param)
4055     return ([] if param is None
4056             else [command_option, str(param)] if separator is None
4057             else [f'{command_option}{separator}{param}'])
4058
4059
4060 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4061     param = params.get(param)
4062     assert param in (True, False, None)
4063     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
4064
4065
4066 def cli_valueless_option(params, command_option, param, expected_value=True):
4067     return [command_option] if params.get(param) == expected_value else []
4068
4069
4070 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4071     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4072         if use_compat:
4073             return argdict
4074         else:
4075             argdict = None
4076     if argdict is None:
4077         return default
4078     assert isinstance(argdict, dict)
4079
4080     assert isinstance(keys, (list, tuple))
4081     for key_list in keys:
4082         arg_list = list(filter(
4083             lambda x: x is not None,
4084             [argdict.get(key.lower()) for key in variadic(key_list)]))
4085         if arg_list:
4086             return [arg for args in arg_list for arg in args]
4087     return default
4088
4089
4090 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
4091     main_key, exe = main_key.lower(), exe.lower()
4092     root_key = exe if main_key == exe else f'{main_key}+{exe}'
4093     keys = [f'{root_key}{k}' for k in (keys or [''])]
4094     if root_key in keys:
4095         if main_key != exe:
4096             keys.append((main_key, exe))
4097         keys.append('default')
4098     else:
4099         use_compat = False
4100     return cli_configuration_args(argdict, keys, default, use_compat)
4101
4102
4103 class ISO639Utils:
4104     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4105     _lang_map = {
4106         'aa': 'aar',
4107         'ab': 'abk',
4108         'ae': 'ave',
4109         'af': 'afr',
4110         'ak': 'aka',
4111         'am': 'amh',
4112         'an': 'arg',
4113         'ar': 'ara',
4114         'as': 'asm',
4115         'av': 'ava',
4116         'ay': 'aym',
4117         'az': 'aze',
4118         'ba': 'bak',
4119         'be': 'bel',
4120         'bg': 'bul',
4121         'bh': 'bih',
4122         'bi': 'bis',
4123         'bm': 'bam',
4124         'bn': 'ben',
4125         'bo': 'bod',
4126         'br': 'bre',
4127         'bs': 'bos',
4128         'ca': 'cat',
4129         'ce': 'che',
4130         'ch': 'cha',
4131         'co': 'cos',
4132         'cr': 'cre',
4133         'cs': 'ces',
4134         'cu': 'chu',
4135         'cv': 'chv',
4136         'cy': 'cym',
4137         'da': 'dan',
4138         'de': 'deu',
4139         'dv': 'div',
4140         'dz': 'dzo',
4141         'ee': 'ewe',
4142         'el': 'ell',
4143         'en': 'eng',
4144         'eo': 'epo',
4145         'es': 'spa',
4146         'et': 'est',
4147         'eu': 'eus',
4148         'fa': 'fas',
4149         'ff': 'ful',
4150         'fi': 'fin',
4151         'fj': 'fij',
4152         'fo': 'fao',
4153         'fr': 'fra',
4154         'fy': 'fry',
4155         'ga': 'gle',
4156         'gd': 'gla',
4157         'gl': 'glg',
4158         'gn': 'grn',
4159         'gu': 'guj',
4160         'gv': 'glv',
4161         'ha': 'hau',
4162         'he': 'heb',
4163         'iw': 'heb',  # Replaced by he in 1989 revision
4164         'hi': 'hin',
4165         'ho': 'hmo',
4166         'hr': 'hrv',
4167         'ht': 'hat',
4168         'hu': 'hun',
4169         'hy': 'hye',
4170         'hz': 'her',
4171         'ia': 'ina',
4172         'id': 'ind',
4173         'in': 'ind',  # Replaced by id in 1989 revision
4174         'ie': 'ile',
4175         'ig': 'ibo',
4176         'ii': 'iii',
4177         'ik': 'ipk',
4178         'io': 'ido',
4179         'is': 'isl',
4180         'it': 'ita',
4181         'iu': 'iku',
4182         'ja': 'jpn',
4183         'jv': 'jav',
4184         'ka': 'kat',
4185         'kg': 'kon',
4186         'ki': 'kik',
4187         'kj': 'kua',
4188         'kk': 'kaz',
4189         'kl': 'kal',
4190         'km': 'khm',
4191         'kn': 'kan',
4192         'ko': 'kor',
4193         'kr': 'kau',
4194         'ks': 'kas',
4195         'ku': 'kur',
4196         'kv': 'kom',
4197         'kw': 'cor',
4198         'ky': 'kir',
4199         'la': 'lat',
4200         'lb': 'ltz',
4201         'lg': 'lug',
4202         'li': 'lim',
4203         'ln': 'lin',
4204         'lo': 'lao',
4205         'lt': 'lit',
4206         'lu': 'lub',
4207         'lv': 'lav',
4208         'mg': 'mlg',
4209         'mh': 'mah',
4210         'mi': 'mri',
4211         'mk': 'mkd',
4212         'ml': 'mal',
4213         'mn': 'mon',
4214         'mr': 'mar',
4215         'ms': 'msa',
4216         'mt': 'mlt',
4217         'my': 'mya',
4218         'na': 'nau',
4219         'nb': 'nob',
4220         'nd': 'nde',
4221         'ne': 'nep',
4222         'ng': 'ndo',
4223         'nl': 'nld',
4224         'nn': 'nno',
4225         'no': 'nor',
4226         'nr': 'nbl',
4227         'nv': 'nav',
4228         'ny': 'nya',
4229         'oc': 'oci',
4230         'oj': 'oji',
4231         'om': 'orm',
4232         'or': 'ori',
4233         'os': 'oss',
4234         'pa': 'pan',
4235         'pi': 'pli',
4236         'pl': 'pol',
4237         'ps': 'pus',
4238         'pt': 'por',
4239         'qu': 'que',
4240         'rm': 'roh',
4241         'rn': 'run',
4242         'ro': 'ron',
4243         'ru': 'rus',
4244         'rw': 'kin',
4245         'sa': 'san',
4246         'sc': 'srd',
4247         'sd': 'snd',
4248         'se': 'sme',
4249         'sg': 'sag',
4250         'si': 'sin',
4251         'sk': 'slk',
4252         'sl': 'slv',
4253         'sm': 'smo',
4254         'sn': 'sna',
4255         'so': 'som',
4256         'sq': 'sqi',
4257         'sr': 'srp',
4258         'ss': 'ssw',
4259         'st': 'sot',
4260         'su': 'sun',
4261         'sv': 'swe',
4262         'sw': 'swa',
4263         'ta': 'tam',
4264         'te': 'tel',
4265         'tg': 'tgk',
4266         'th': 'tha',
4267         'ti': 'tir',
4268         'tk': 'tuk',
4269         'tl': 'tgl',
4270         'tn': 'tsn',
4271         'to': 'ton',
4272         'tr': 'tur',
4273         'ts': 'tso',
4274         'tt': 'tat',
4275         'tw': 'twi',
4276         'ty': 'tah',
4277         'ug': 'uig',
4278         'uk': 'ukr',
4279         'ur': 'urd',
4280         'uz': 'uzb',
4281         've': 'ven',
4282         'vi': 'vie',
4283         'vo': 'vol',
4284         'wa': 'wln',
4285         'wo': 'wol',
4286         'xh': 'xho',
4287         'yi': 'yid',
4288         'ji': 'yid',  # Replaced by yi in 1989 revision
4289         'yo': 'yor',
4290         'za': 'zha',
4291         'zh': 'zho',
4292         'zu': 'zul',
4293     }
4294
4295     @classmethod
4296     def short2long(cls, code):
4297         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4298         return cls._lang_map.get(code[:2])
4299
4300     @classmethod
4301     def long2short(cls, code):
4302         """Convert language code from ISO 639-2/T to ISO 639-1"""
4303         for short_name, long_name in cls._lang_map.items():
4304             if long_name == code:
4305                 return short_name
4306
4307
4308 class ISO3166Utils:
4309     # From http://data.okfn.org/data/core/country-list
4310     _country_map = {
4311         'AF': 'Afghanistan',
4312         'AX': 'Åland Islands',
4313         'AL': 'Albania',
4314         'DZ': 'Algeria',
4315         'AS': 'American Samoa',
4316         'AD': 'Andorra',
4317         'AO': 'Angola',
4318         'AI': 'Anguilla',
4319         'AQ': 'Antarctica',
4320         'AG': 'Antigua and Barbuda',
4321         'AR': 'Argentina',
4322         'AM': 'Armenia',
4323         'AW': 'Aruba',
4324         'AU': 'Australia',
4325         'AT': 'Austria',
4326         'AZ': 'Azerbaijan',
4327         'BS': 'Bahamas',
4328         'BH': 'Bahrain',
4329         'BD': 'Bangladesh',
4330         'BB': 'Barbados',
4331         'BY': 'Belarus',
4332         'BE': 'Belgium',
4333         'BZ': 'Belize',
4334         'BJ': 'Benin',
4335         'BM': 'Bermuda',
4336         'BT': 'Bhutan',
4337         'BO': 'Bolivia, Plurinational State of',
4338         'BQ': 'Bonaire, Sint Eustatius and Saba',
4339         'BA': 'Bosnia and Herzegovina',
4340         'BW': 'Botswana',
4341         'BV': 'Bouvet Island',
4342         'BR': 'Brazil',
4343         'IO': 'British Indian Ocean Territory',
4344         'BN': 'Brunei Darussalam',
4345         'BG': 'Bulgaria',
4346         'BF': 'Burkina Faso',
4347         'BI': 'Burundi',
4348         'KH': 'Cambodia',
4349         'CM': 'Cameroon',
4350         'CA': 'Canada',
4351         'CV': 'Cape Verde',
4352         'KY': 'Cayman Islands',
4353         'CF': 'Central African Republic',
4354         'TD': 'Chad',
4355         'CL': 'Chile',
4356         'CN': 'China',
4357         'CX': 'Christmas Island',
4358         'CC': 'Cocos (Keeling) Islands',
4359         'CO': 'Colombia',
4360         'KM': 'Comoros',
4361         'CG': 'Congo',
4362         'CD': 'Congo, the Democratic Republic of the',
4363         'CK': 'Cook Islands',
4364         'CR': 'Costa Rica',
4365         'CI': 'Côte d\'Ivoire',
4366         'HR': 'Croatia',
4367         'CU': 'Cuba',
4368         'CW': 'Curaçao',
4369         'CY': 'Cyprus',
4370         'CZ': 'Czech Republic',
4371         'DK': 'Denmark',
4372         'DJ': 'Djibouti',
4373         'DM': 'Dominica',
4374         'DO': 'Dominican Republic',
4375         'EC': 'Ecuador',
4376         'EG': 'Egypt',
4377         'SV': 'El Salvador',
4378         'GQ': 'Equatorial Guinea',
4379         'ER': 'Eritrea',
4380         'EE': 'Estonia',
4381         'ET': 'Ethiopia',
4382         'FK': 'Falkland Islands (Malvinas)',
4383         'FO': 'Faroe Islands',
4384         'FJ': 'Fiji',
4385         'FI': 'Finland',
4386         'FR': 'France',
4387         'GF': 'French Guiana',
4388         'PF': 'French Polynesia',
4389         'TF': 'French Southern Territories',
4390         'GA': 'Gabon',
4391         'GM': 'Gambia',
4392         'GE': 'Georgia',
4393         'DE': 'Germany',
4394         'GH': 'Ghana',
4395         'GI': 'Gibraltar',
4396         'GR': 'Greece',
4397         'GL': 'Greenland',
4398         'GD': 'Grenada',
4399         'GP': 'Guadeloupe',
4400         'GU': 'Guam',
4401         'GT': 'Guatemala',
4402         'GG': 'Guernsey',
4403         'GN': 'Guinea',
4404         'GW': 'Guinea-Bissau',
4405         'GY': 'Guyana',
4406         'HT': 'Haiti',
4407         'HM': 'Heard Island and McDonald Islands',
4408         'VA': 'Holy See (Vatican City State)',
4409         'HN': 'Honduras',
4410         'HK': 'Hong Kong',
4411         'HU': 'Hungary',
4412         'IS': 'Iceland',
4413         'IN': 'India',
4414         'ID': 'Indonesia',
4415         'IR': 'Iran, Islamic Republic of',
4416         'IQ': 'Iraq',
4417         'IE': 'Ireland',
4418         'IM': 'Isle of Man',
4419         'IL': 'Israel',
4420         'IT': 'Italy',
4421         'JM': 'Jamaica',
4422         'JP': 'Japan',
4423         'JE': 'Jersey',
4424         'JO': 'Jordan',
4425         'KZ': 'Kazakhstan',
4426         'KE': 'Kenya',
4427         'KI': 'Kiribati',
4428         'KP': 'Korea, Democratic People\'s Republic of',
4429         'KR': 'Korea, Republic of',
4430         'KW': 'Kuwait',
4431         'KG': 'Kyrgyzstan',
4432         'LA': 'Lao People\'s Democratic Republic',
4433         'LV': 'Latvia',
4434         'LB': 'Lebanon',
4435         'LS': 'Lesotho',
4436         'LR': 'Liberia',
4437         'LY': 'Libya',
4438         'LI': 'Liechtenstein',
4439         'LT': 'Lithuania',
4440         'LU': 'Luxembourg',
4441         'MO': 'Macao',
4442         'MK': 'Macedonia, the Former Yugoslav Republic of',
4443         'MG': 'Madagascar',
4444         'MW': 'Malawi',
4445         'MY': 'Malaysia',
4446         'MV': 'Maldives',
4447         'ML': 'Mali',
4448         'MT': 'Malta',
4449         'MH': 'Marshall Islands',
4450         'MQ': 'Martinique',
4451         'MR': 'Mauritania',
4452         'MU': 'Mauritius',
4453         'YT': 'Mayotte',
4454         'MX': 'Mexico',
4455         'FM': 'Micronesia, Federated States of',
4456         'MD': 'Moldova, Republic of',
4457         'MC': 'Monaco',
4458         'MN': 'Mongolia',
4459         'ME': 'Montenegro',
4460         'MS': 'Montserrat',
4461         'MA': 'Morocco',
4462         'MZ': 'Mozambique',
4463         'MM': 'Myanmar',
4464         'NA': 'Namibia',
4465         'NR': 'Nauru',
4466         'NP': 'Nepal',
4467         'NL': 'Netherlands',
4468         'NC': 'New Caledonia',
4469         'NZ': 'New Zealand',
4470         'NI': 'Nicaragua',
4471         'NE': 'Niger',
4472         'NG': 'Nigeria',
4473         'NU': 'Niue',
4474         'NF': 'Norfolk Island',
4475         'MP': 'Northern Mariana Islands',
4476         'NO': 'Norway',
4477         'OM': 'Oman',
4478         'PK': 'Pakistan',
4479         'PW': 'Palau',
4480         'PS': 'Palestine, State of',
4481         'PA': 'Panama',
4482         'PG': 'Papua New Guinea',
4483         'PY': 'Paraguay',
4484         'PE': 'Peru',
4485         'PH': 'Philippines',
4486         'PN': 'Pitcairn',
4487         'PL': 'Poland',
4488         'PT': 'Portugal',
4489         'PR': 'Puerto Rico',
4490         'QA': 'Qatar',
4491         'RE': 'Réunion',
4492         'RO': 'Romania',
4493         'RU': 'Russian Federation',
4494         'RW': 'Rwanda',
4495         'BL': 'Saint Barthélemy',
4496         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4497         'KN': 'Saint Kitts and Nevis',
4498         'LC': 'Saint Lucia',
4499         'MF': 'Saint Martin (French part)',
4500         'PM': 'Saint Pierre and Miquelon',
4501         'VC': 'Saint Vincent and the Grenadines',
4502         'WS': 'Samoa',
4503         'SM': 'San Marino',
4504         'ST': 'Sao Tome and Principe',
4505         'SA': 'Saudi Arabia',
4506         'SN': 'Senegal',
4507         'RS': 'Serbia',
4508         'SC': 'Seychelles',
4509         'SL': 'Sierra Leone',
4510         'SG': 'Singapore',
4511         'SX': 'Sint Maarten (Dutch part)',
4512         'SK': 'Slovakia',
4513         'SI': 'Slovenia',
4514         'SB': 'Solomon Islands',
4515         'SO': 'Somalia',
4516         'ZA': 'South Africa',
4517         'GS': 'South Georgia and the South Sandwich Islands',
4518         'SS': 'South Sudan',
4519         'ES': 'Spain',
4520         'LK': 'Sri Lanka',
4521         'SD': 'Sudan',
4522         'SR': 'Suriname',
4523         'SJ': 'Svalbard and Jan Mayen',
4524         'SZ': 'Swaziland',
4525         'SE': 'Sweden',
4526         'CH': 'Switzerland',
4527         'SY': 'Syrian Arab Republic',
4528         'TW': 'Taiwan, Province of China',
4529         'TJ': 'Tajikistan',
4530         'TZ': 'Tanzania, United Republic of',
4531         'TH': 'Thailand',
4532         'TL': 'Timor-Leste',
4533         'TG': 'Togo',
4534         'TK': 'Tokelau',
4535         'TO': 'Tonga',
4536         'TT': 'Trinidad and Tobago',
4537         'TN': 'Tunisia',
4538         'TR': 'Turkey',
4539         'TM': 'Turkmenistan',
4540         'TC': 'Turks and Caicos Islands',
4541         'TV': 'Tuvalu',
4542         'UG': 'Uganda',
4543         'UA': 'Ukraine',
4544         'AE': 'United Arab Emirates',
4545         'GB': 'United Kingdom',
4546         'US': 'United States',
4547         'UM': 'United States Minor Outlying Islands',
4548         'UY': 'Uruguay',
4549         'UZ': 'Uzbekistan',
4550         'VU': 'Vanuatu',
4551         'VE': 'Venezuela, Bolivarian Republic of',
4552         'VN': 'Viet Nam',
4553         'VG': 'Virgin Islands, British',
4554         'VI': 'Virgin Islands, U.S.',
4555         'WF': 'Wallis and Futuna',
4556         'EH': 'Western Sahara',
4557         'YE': 'Yemen',
4558         'ZM': 'Zambia',
4559         'ZW': 'Zimbabwe',
4560         # Not ISO 3166 codes, but used for IP blocks
4561         'AP': 'Asia/Pacific Region',
4562         'EU': 'Europe',
4563     }
4564
4565     @classmethod
4566     def short2full(cls, code):
4567         """Convert an ISO 3166-2 country code to the corresponding full name"""
4568         return cls._country_map.get(code.upper())
4569
4570
4571 class GeoUtils:
4572     # Major IPv4 address blocks per country
4573     _country_ip_map = {
4574         'AD': '46.172.224.0/19',
4575         'AE': '94.200.0.0/13',
4576         'AF': '149.54.0.0/17',
4577         'AG': '209.59.64.0/18',
4578         'AI': '204.14.248.0/21',
4579         'AL': '46.99.0.0/16',
4580         'AM': '46.70.0.0/15',
4581         'AO': '105.168.0.0/13',
4582         'AP': '182.50.184.0/21',
4583         'AQ': '23.154.160.0/24',
4584         'AR': '181.0.0.0/12',
4585         'AS': '202.70.112.0/20',
4586         'AT': '77.116.0.0/14',
4587         'AU': '1.128.0.0/11',
4588         'AW': '181.41.0.0/18',
4589         'AX': '185.217.4.0/22',
4590         'AZ': '5.197.0.0/16',
4591         'BA': '31.176.128.0/17',
4592         'BB': '65.48.128.0/17',
4593         'BD': '114.130.0.0/16',
4594         'BE': '57.0.0.0/8',
4595         'BF': '102.178.0.0/15',
4596         'BG': '95.42.0.0/15',
4597         'BH': '37.131.0.0/17',
4598         'BI': '154.117.192.0/18',
4599         'BJ': '137.255.0.0/16',
4600         'BL': '185.212.72.0/23',
4601         'BM': '196.12.64.0/18',
4602         'BN': '156.31.0.0/16',
4603         'BO': '161.56.0.0/16',
4604         'BQ': '161.0.80.0/20',
4605         'BR': '191.128.0.0/12',
4606         'BS': '24.51.64.0/18',
4607         'BT': '119.2.96.0/19',
4608         'BW': '168.167.0.0/16',
4609         'BY': '178.120.0.0/13',
4610         'BZ': '179.42.192.0/18',
4611         'CA': '99.224.0.0/11',
4612         'CD': '41.243.0.0/16',
4613         'CF': '197.242.176.0/21',
4614         'CG': '160.113.0.0/16',
4615         'CH': '85.0.0.0/13',
4616         'CI': '102.136.0.0/14',
4617         'CK': '202.65.32.0/19',
4618         'CL': '152.172.0.0/14',
4619         'CM': '102.244.0.0/14',
4620         'CN': '36.128.0.0/10',
4621         'CO': '181.240.0.0/12',
4622         'CR': '201.192.0.0/12',
4623         'CU': '152.206.0.0/15',
4624         'CV': '165.90.96.0/19',
4625         'CW': '190.88.128.0/17',
4626         'CY': '31.153.0.0/16',
4627         'CZ': '88.100.0.0/14',
4628         'DE': '53.0.0.0/8',
4629         'DJ': '197.241.0.0/17',
4630         'DK': '87.48.0.0/12',
4631         'DM': '192.243.48.0/20',
4632         'DO': '152.166.0.0/15',
4633         'DZ': '41.96.0.0/12',
4634         'EC': '186.68.0.0/15',
4635         'EE': '90.190.0.0/15',
4636         'EG': '156.160.0.0/11',
4637         'ER': '196.200.96.0/20',
4638         'ES': '88.0.0.0/11',
4639         'ET': '196.188.0.0/14',
4640         'EU': '2.16.0.0/13',
4641         'FI': '91.152.0.0/13',
4642         'FJ': '144.120.0.0/16',
4643         'FK': '80.73.208.0/21',
4644         'FM': '119.252.112.0/20',
4645         'FO': '88.85.32.0/19',
4646         'FR': '90.0.0.0/9',
4647         'GA': '41.158.0.0/15',
4648         'GB': '25.0.0.0/8',
4649         'GD': '74.122.88.0/21',
4650         'GE': '31.146.0.0/16',
4651         'GF': '161.22.64.0/18',
4652         'GG': '62.68.160.0/19',
4653         'GH': '154.160.0.0/12',
4654         'GI': '95.164.0.0/16',
4655         'GL': '88.83.0.0/19',
4656         'GM': '160.182.0.0/15',
4657         'GN': '197.149.192.0/18',
4658         'GP': '104.250.0.0/19',
4659         'GQ': '105.235.224.0/20',
4660         'GR': '94.64.0.0/13',
4661         'GT': '168.234.0.0/16',
4662         'GU': '168.123.0.0/16',
4663         'GW': '197.214.80.0/20',
4664         'GY': '181.41.64.0/18',
4665         'HK': '113.252.0.0/14',
4666         'HN': '181.210.0.0/16',
4667         'HR': '93.136.0.0/13',
4668         'HT': '148.102.128.0/17',
4669         'HU': '84.0.0.0/14',
4670         'ID': '39.192.0.0/10',
4671         'IE': '87.32.0.0/12',
4672         'IL': '79.176.0.0/13',
4673         'IM': '5.62.80.0/20',
4674         'IN': '117.192.0.0/10',
4675         'IO': '203.83.48.0/21',
4676         'IQ': '37.236.0.0/14',
4677         'IR': '2.176.0.0/12',
4678         'IS': '82.221.0.0/16',
4679         'IT': '79.0.0.0/10',
4680         'JE': '87.244.64.0/18',
4681         'JM': '72.27.0.0/17',
4682         'JO': '176.29.0.0/16',
4683         'JP': '133.0.0.0/8',
4684         'KE': '105.48.0.0/12',
4685         'KG': '158.181.128.0/17',
4686         'KH': '36.37.128.0/17',
4687         'KI': '103.25.140.0/22',
4688         'KM': '197.255.224.0/20',
4689         'KN': '198.167.192.0/19',
4690         'KP': '175.45.176.0/22',
4691         'KR': '175.192.0.0/10',
4692         'KW': '37.36.0.0/14',
4693         'KY': '64.96.0.0/15',
4694         'KZ': '2.72.0.0/13',
4695         'LA': '115.84.64.0/18',
4696         'LB': '178.135.0.0/16',
4697         'LC': '24.92.144.0/20',
4698         'LI': '82.117.0.0/19',
4699         'LK': '112.134.0.0/15',
4700         'LR': '102.183.0.0/16',
4701         'LS': '129.232.0.0/17',
4702         'LT': '78.56.0.0/13',
4703         'LU': '188.42.0.0/16',
4704         'LV': '46.109.0.0/16',
4705         'LY': '41.252.0.0/14',
4706         'MA': '105.128.0.0/11',
4707         'MC': '88.209.64.0/18',
4708         'MD': '37.246.0.0/16',
4709         'ME': '178.175.0.0/17',
4710         'MF': '74.112.232.0/21',
4711         'MG': '154.126.0.0/17',
4712         'MH': '117.103.88.0/21',
4713         'MK': '77.28.0.0/15',
4714         'ML': '154.118.128.0/18',
4715         'MM': '37.111.0.0/17',
4716         'MN': '49.0.128.0/17',
4717         'MO': '60.246.0.0/16',
4718         'MP': '202.88.64.0/20',
4719         'MQ': '109.203.224.0/19',
4720         'MR': '41.188.64.0/18',
4721         'MS': '208.90.112.0/22',
4722         'MT': '46.11.0.0/16',
4723         'MU': '105.16.0.0/12',
4724         'MV': '27.114.128.0/18',
4725         'MW': '102.70.0.0/15',
4726         'MX': '187.192.0.0/11',
4727         'MY': '175.136.0.0/13',
4728         'MZ': '197.218.0.0/15',
4729         'NA': '41.182.0.0/16',
4730         'NC': '101.101.0.0/18',
4731         'NE': '197.214.0.0/18',
4732         'NF': '203.17.240.0/22',
4733         'NG': '105.112.0.0/12',
4734         'NI': '186.76.0.0/15',
4735         'NL': '145.96.0.0/11',
4736         'NO': '84.208.0.0/13',
4737         'NP': '36.252.0.0/15',
4738         'NR': '203.98.224.0/19',
4739         'NU': '49.156.48.0/22',
4740         'NZ': '49.224.0.0/14',
4741         'OM': '5.36.0.0/15',
4742         'PA': '186.72.0.0/15',
4743         'PE': '186.160.0.0/14',
4744         'PF': '123.50.64.0/18',
4745         'PG': '124.240.192.0/19',
4746         'PH': '49.144.0.0/13',
4747         'PK': '39.32.0.0/11',
4748         'PL': '83.0.0.0/11',
4749         'PM': '70.36.0.0/20',
4750         'PR': '66.50.0.0/16',
4751         'PS': '188.161.0.0/16',
4752         'PT': '85.240.0.0/13',
4753         'PW': '202.124.224.0/20',
4754         'PY': '181.120.0.0/14',
4755         'QA': '37.210.0.0/15',
4756         'RE': '102.35.0.0/16',
4757         'RO': '79.112.0.0/13',
4758         'RS': '93.86.0.0/15',
4759         'RU': '5.136.0.0/13',
4760         'RW': '41.186.0.0/16',
4761         'SA': '188.48.0.0/13',
4762         'SB': '202.1.160.0/19',
4763         'SC': '154.192.0.0/11',
4764         'SD': '102.120.0.0/13',
4765         'SE': '78.64.0.0/12',
4766         'SG': '8.128.0.0/10',
4767         'SI': '188.196.0.0/14',
4768         'SK': '78.98.0.0/15',
4769         'SL': '102.143.0.0/17',
4770         'SM': '89.186.32.0/19',
4771         'SN': '41.82.0.0/15',
4772         'SO': '154.115.192.0/18',
4773         'SR': '186.179.128.0/17',
4774         'SS': '105.235.208.0/21',
4775         'ST': '197.159.160.0/19',
4776         'SV': '168.243.0.0/16',
4777         'SX': '190.102.0.0/20',
4778         'SY': '5.0.0.0/16',
4779         'SZ': '41.84.224.0/19',
4780         'TC': '65.255.48.0/20',
4781         'TD': '154.68.128.0/19',
4782         'TG': '196.168.0.0/14',
4783         'TH': '171.96.0.0/13',
4784         'TJ': '85.9.128.0/18',
4785         'TK': '27.96.24.0/21',
4786         'TL': '180.189.160.0/20',
4787         'TM': '95.85.96.0/19',
4788         'TN': '197.0.0.0/11',
4789         'TO': '175.176.144.0/21',
4790         'TR': '78.160.0.0/11',
4791         'TT': '186.44.0.0/15',
4792         'TV': '202.2.96.0/19',
4793         'TW': '120.96.0.0/11',
4794         'TZ': '156.156.0.0/14',
4795         'UA': '37.52.0.0/14',
4796         'UG': '102.80.0.0/13',
4797         'US': '6.0.0.0/8',
4798         'UY': '167.56.0.0/13',
4799         'UZ': '84.54.64.0/18',
4800         'VA': '212.77.0.0/19',
4801         'VC': '207.191.240.0/21',
4802         'VE': '186.88.0.0/13',
4803         'VG': '66.81.192.0/20',
4804         'VI': '146.226.0.0/16',
4805         'VN': '14.160.0.0/11',
4806         'VU': '202.80.32.0/20',
4807         'WF': '117.20.32.0/21',
4808         'WS': '202.4.32.0/19',
4809         'YE': '134.35.0.0/16',
4810         'YT': '41.242.116.0/22',
4811         'ZA': '41.0.0.0/11',
4812         'ZM': '102.144.0.0/13',
4813         'ZW': '102.177.192.0/18',
4814     }
4815
4816     @classmethod
4817     def random_ipv4(cls, code_or_block):
4818         if len(code_or_block) == 2:
4819             block = cls._country_ip_map.get(code_or_block.upper())
4820             if not block:
4821                 return None
4822         else:
4823             block = code_or_block
4824         addr, preflen = block.split('/')
4825         addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
4826         addr_max = addr_min | (0xffffffff >> int(preflen))
4827         return str(socket.inet_ntoa(
4828             struct.pack('!L', random.randint(addr_min, addr_max))))
4829
4830
4831 class PerRequestProxyHandler(urllib.request.ProxyHandler):
4832     def __init__(self, proxies=None):
4833         # Set default handlers
4834         for type in ('http', 'https'):
4835             setattr(self, '%s_open' % type,
4836                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4837                         meth(r, proxy, type))
4838         urllib.request.ProxyHandler.__init__(self, proxies)
4839
4840     def proxy_open(self, req, proxy, type):
4841         req_proxy = req.headers.get('Ytdl-request-proxy')
4842         if req_proxy is not None:
4843             proxy = req_proxy
4844             del req.headers['Ytdl-request-proxy']
4845
4846         if proxy == '__noproxy__':
4847             return None  # No Proxy
4848         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4849             req.add_header('Ytdl-socks-proxy', proxy)
4850             # yt-dlp's http/https handlers do wrapping the socket with socks
4851             return None
4852         return urllib.request.ProxyHandler.proxy_open(
4853             self, req, proxy, type)
4854
4855
4856 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4857 # released into Public Domain
4858 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4859
4860 def long_to_bytes(n, blocksize=0):
4861     """long_to_bytes(n:long, blocksize:int) : string
4862     Convert a long integer to a byte string.
4863
4864     If optional blocksize is given and greater than zero, pad the front of the
4865     byte string with binary zeros so that the length is a multiple of
4866     blocksize.
4867     """
4868     # after much testing, this algorithm was deemed to be the fastest
4869     s = b''
4870     n = int(n)
4871     while n > 0:
4872         s = struct.pack('>I', n & 0xffffffff) + s
4873         n = n >> 32
4874     # strip off leading zeros
4875     for i in range(len(s)):
4876         if s[i] != b'\000'[0]:
4877             break
4878     else:
4879         # only happens when n == 0
4880         s = b'\000'
4881         i = 0
4882     s = s[i:]
4883     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4884     # de-padding being done above, but sigh...
4885     if blocksize > 0 and len(s) % blocksize:
4886         s = (blocksize - len(s) % blocksize) * b'\000' + s
4887     return s
4888
4889
4890 def bytes_to_long(s):
4891     """bytes_to_long(string) : long
4892     Convert a byte string to a long integer.
4893
4894     This is (essentially) the inverse of long_to_bytes().
4895     """
4896     acc = 0
4897     length = len(s)
4898     if length % 4:
4899         extra = (4 - length % 4)
4900         s = b'\000' * extra + s
4901         length = length + extra
4902     for i in range(0, length, 4):
4903         acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
4904     return acc
4905
4906
4907 def ohdave_rsa_encrypt(data, exponent, modulus):
4908     '''
4909     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4910
4911     Input:
4912         data: data to encrypt, bytes-like object
4913         exponent, modulus: parameter e and N of RSA algorithm, both integer
4914     Output: hex string of encrypted data
4915
4916     Limitation: supports one block encryption only
4917     '''
4918
4919     payload = int(binascii.hexlify(data[::-1]), 16)
4920     encrypted = pow(payload, exponent, modulus)
4921     return '%x' % encrypted
4922
4923
4924 def pkcs1pad(data, length):
4925     """
4926     Padding input data with PKCS#1 scheme
4927
4928     @param {int[]} data        input data
4929     @param {int}   length      target length
4930     @returns {int[]}           padded data
4931     """
4932     if len(data) > length - 11:
4933         raise ValueError('Input data too long for PKCS#1 padding')
4934
4935     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4936     return [0, 2] + pseudo_random + [0] + data
4937
4938
4939 def _base_n_table(n, table):
4940     if not table and not n:
4941         raise ValueError('Either table or n must be specified')
4942     table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
4943
4944     if n and n != len(table):
4945         raise ValueError(f'base {n} exceeds table length {len(table)}')
4946     return table
4947
4948
4949 def encode_base_n(num, n=None, table=None):
4950     """Convert given int to a base-n string"""
4951     table = _base_n_table(n, table)
4952     if not num:
4953         return table[0]
4954
4955     result, base = '', len(table)
4956     while num:
4957         result = table[num % base] + result
4958         num = num // base
4959     return result
4960
4961
4962 def decode_base_n(string, n=None, table=None):
4963     """Convert given base-n string to int"""
4964     table = {char: index for index, char in enumerate(_base_n_table(n, table))}
4965     result, base = 0, len(table)
4966     for char in string:
4967         result = result * base + table[char]
4968     return result
4969
4970
4971 def decode_base(value, digits):
4972     deprecation_warning(f'{__name__}.decode_base is deprecated and may be removed '
4973                         f'in a future version. Use {__name__}.decode_base_n instead')
4974     return decode_base_n(value, table=digits)
4975
4976
4977 def decode_packed_codes(code):
4978     mobj = re.search(PACKED_CODES_RE, code)
4979     obfuscated_code, base, count, symbols = mobj.groups()
4980     base = int(base)
4981     count = int(count)
4982     symbols = symbols.split('|')
4983     symbol_table = {}
4984
4985     while count:
4986         count -= 1
4987         base_n_count = encode_base_n(count, base)
4988         symbol_table[base_n_count] = symbols[count] or base_n_count
4989
4990     return re.sub(
4991         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4992         obfuscated_code)
4993
4994
4995 def caesar(s, alphabet, shift):
4996     if shift == 0:
4997         return s
4998     l = len(alphabet)
4999     return ''.join(
5000         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5001         for c in s)
5002
5003
5004 def rot47(s):
5005     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5006
5007
5008 def parse_m3u8_attributes(attrib):
5009     info = {}
5010     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5011         if val.startswith('"'):
5012             val = val[1:-1]
5013         info[key] = val
5014     return info
5015
5016
5017 def urshift(val, n):
5018     return val >> n if val >= 0 else (val + 0x100000000) >> n
5019
5020
5021 # Based on png2str() written by @gdkchan and improved by @yokrysty
5022 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5023 def decode_png(png_data):
5024     # Reference: https://www.w3.org/TR/PNG/
5025     header = png_data[8:]
5026
5027     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5028         raise OSError('Not a valid PNG file.')
5029
5030     int_map = {1: '>B', 2: '>H', 4: '>I'}
5031     unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
5032
5033     chunks = []
5034
5035     while header:
5036         length = unpack_integer(header[:4])
5037         header = header[4:]
5038
5039         chunk_type = header[:4]
5040         header = header[4:]
5041
5042         chunk_data = header[:length]
5043         header = header[length:]
5044
5045         header = header[4:]  # Skip CRC
5046
5047         chunks.append({
5048             'type': chunk_type,
5049             'length': length,
5050             'data': chunk_data
5051         })
5052
5053     ihdr = chunks[0]['data']
5054
5055     width = unpack_integer(ihdr[:4])
5056     height = unpack_integer(ihdr[4:8])
5057
5058     idat = b''
5059
5060     for chunk in chunks:
5061         if chunk['type'] == b'IDAT':
5062             idat += chunk['data']
5063
5064     if not idat:
5065         raise OSError('Unable to read PNG data.')
5066
5067     decompressed_data = bytearray(zlib.decompress(idat))
5068
5069     stride = width * 3
5070     pixels = []
5071
5072     def _get_pixel(idx):
5073         x = idx % stride
5074         y = idx // stride
5075         return pixels[y][x]
5076
5077     for y in range(height):
5078         basePos = y * (1 + stride)
5079         filter_type = decompressed_data[basePos]
5080
5081         current_row = []
5082
5083         pixels.append(current_row)
5084
5085         for x in range(stride):
5086             color = decompressed_data[1 + basePos + x]
5087             basex = y * stride + x
5088             left = 0
5089             up = 0
5090
5091             if x > 2:
5092                 left = _get_pixel(basex - 3)
5093             if y > 0:
5094                 up = _get_pixel(basex - stride)
5095
5096             if filter_type == 1:  # Sub
5097                 color = (color + left) & 0xff
5098             elif filter_type == 2:  # Up
5099                 color = (color + up) & 0xff
5100             elif filter_type == 3:  # Average
5101                 color = (color + ((left + up) >> 1)) & 0xff
5102             elif filter_type == 4:  # Paeth
5103                 a = left
5104                 b = up
5105                 c = 0
5106
5107                 if x > 2 and y > 0:
5108                     c = _get_pixel(basex - stride - 3)
5109
5110                 p = a + b - c
5111
5112                 pa = abs(p - a)
5113                 pb = abs(p - b)
5114                 pc = abs(p - c)
5115
5116                 if pa <= pb and pa <= pc:
5117                     color = (color + a) & 0xff
5118                 elif pb <= pc:
5119                     color = (color + b) & 0xff
5120                 else:
5121                     color = (color + c) & 0xff
5122
5123             current_row.append(color)
5124
5125     return width, height, pixels
5126
5127
5128 def write_xattr(path, key, value):
5129     # Windows: Write xattrs to NTFS Alternate Data Streams:
5130     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5131     if compat_os_name == 'nt':
5132         assert ':' not in key
5133         assert os.path.exists(path)
5134
5135         try:
5136             with open(f'{path}:{key}', 'wb') as f:
5137                 f.write(value)
5138         except OSError as e:
5139             raise XAttrMetadataError(e.errno, e.strerror)
5140         return
5141
5142     # UNIX Method 1. Use xattrs/pyxattrs modules
5143
5144     setxattr = None
5145     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
5146         # Unicode arguments are not supported in pyxattr until version 0.5.0
5147         # See https://github.com/ytdl-org/youtube-dl/issues/5498
5148         if version_tuple(xattr.__version__) >= (0, 5, 0):
5149             setxattr = xattr.set
5150     elif xattr:
5151         setxattr = xattr.setxattr
5152
5153     if setxattr:
5154         try:
5155             setxattr(path, key, value)
5156         except OSError as e:
5157             raise XAttrMetadataError(e.errno, e.strerror)
5158         return
5159
5160     # UNIX Method 2. Use setfattr/xattr executables
5161     exe = ('setfattr' if check_executable('setfattr', ['--version'])
5162            else 'xattr' if check_executable('xattr', ['-h']) else None)
5163     if not exe:
5164         raise XAttrUnavailableError(
5165             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
5166             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
5167
5168     value = value.decode()
5169     try:
5170         _, stderr, returncode = Popen.run(
5171             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
5172             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5173     except OSError as e:
5174         raise XAttrMetadataError(e.errno, e.strerror)
5175     if returncode:
5176         raise XAttrMetadataError(returncode, stderr)
5177
5178
5179 def random_birthday(year_field, month_field, day_field):
5180     start_date = datetime.date(1950, 1, 1)
5181     end_date = datetime.date(1995, 12, 31)
5182     offset = random.randint(0, (end_date - start_date).days)
5183     random_date = start_date + datetime.timedelta(offset)
5184     return {
5185         year_field: str(random_date.year),
5186         month_field: str(random_date.month),
5187         day_field: str(random_date.day),
5188     }
5189
5190
5191 # Templates for internet shortcut files, which are plain text files.
5192 DOT_URL_LINK_TEMPLATE = '''\
5193 [InternetShortcut]
5194 URL=%(url)s
5195 '''
5196
5197 DOT_WEBLOC_LINK_TEMPLATE = '''\
5198 <?xml version="1.0" encoding="UTF-8"?>
5199 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5200 <plist version="1.0">
5201 <dict>
5202 \t<key>URL</key>
5203 \t<string>%(url)s</string>
5204 </dict>
5205 </plist>
5206 '''
5207
5208 DOT_DESKTOP_LINK_TEMPLATE = '''\
5209 [Desktop Entry]
5210 Encoding=UTF-8
5211 Name=%(filename)s
5212 Type=Link
5213 URL=%(url)s
5214 Icon=text-html
5215 '''
5216
5217 LINK_TEMPLATES = {
5218     'url': DOT_URL_LINK_TEMPLATE,
5219     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5220     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5221 }
5222
5223
5224 def iri_to_uri(iri):
5225     """
5226     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5227
5228     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5229     """
5230
5231     iri_parts = urllib.parse.urlparse(iri)
5232
5233     if '[' in iri_parts.netloc:
5234         raise ValueError('IPv6 URIs are not, yet, supported.')
5235         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5236
5237     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5238
5239     net_location = ''
5240     if iri_parts.username:
5241         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5242         if iri_parts.password is not None:
5243             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5244         net_location += '@'
5245
5246     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5247     # The 'idna' encoding produces ASCII text.
5248     if iri_parts.port is not None and iri_parts.port != 80:
5249         net_location += ':' + str(iri_parts.port)
5250
5251     return urllib.parse.urlunparse(
5252         (iri_parts.scheme,
5253             net_location,
5254
5255             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5256
5257             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5258             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5259
5260             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5261             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5262
5263             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5264
5265     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5266
5267
5268 def to_high_limit_path(path):
5269     if sys.platform in ['win32', 'cygwin']:
5270         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5271         return '\\\\?\\' + os.path.abspath(path)
5272
5273     return path
5274
5275
5276 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
5277     val = traverse_obj(obj, *variadic(field))
5278     if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
5279         return default
5280     return template % func(val)
5281
5282
5283 def clean_podcast_url(url):
5284     return re.sub(r'''(?x)
5285         (?:
5286             (?:
5287                 chtbl\.com/track|
5288                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5289                 play\.podtrac\.com
5290             )/[^/]+|
5291             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5292             flex\.acast\.com|
5293             pd(?:
5294                 cn\.co| # https://podcorn.com/analytics-prefix/
5295                 st\.fm # https://podsights.com/docs/
5296             )/e
5297         )/''', '', url)
5298
5299
5300 _HEX_TABLE = '0123456789abcdef'
5301
5302
5303 def random_uuidv4():
5304     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5305
5306
5307 def make_dir(path, to_screen=None):
5308     try:
5309         dn = os.path.dirname(path)
5310         if dn and not os.path.exists(dn):
5311             os.makedirs(dn)
5312         return True
5313     except OSError as err:
5314         if callable(to_screen) is not None:
5315             to_screen('unable to create directory ' + error_to_compat_str(err))
5316         return False
5317
5318
5319 def get_executable_path():
5320     from .update import _get_variant_and_executable_path
5321
5322     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5323
5324
5325 def load_plugins(name, suffix, namespace):
5326     classes = {}
5327     with contextlib.suppress(FileNotFoundError):
5328         plugins_spec = importlib.util.spec_from_file_location(
5329             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5330         plugins = importlib.util.module_from_spec(plugins_spec)
5331         sys.modules[plugins_spec.name] = plugins
5332         plugins_spec.loader.exec_module(plugins)
5333         for name in dir(plugins):
5334             if name in namespace:
5335                 continue
5336             if not name.endswith(suffix):
5337                 continue
5338             klass = getattr(plugins, name)
5339             classes[name] = namespace[name] = klass
5340     return classes
5341
5342
5343 def traverse_obj(
5344         obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
5345         casesense=True, is_user_input=False, traverse_string=False):
5346     """
5347     Safely traverse nested `dict`s and `Sequence`s
5348
5349     >>> obj = [{}, {"key": "value"}]
5350     >>> traverse_obj(obj, (1, "key"))
5351     "value"
5352
5353     Each of the provided `paths` is tested and the first producing a valid result will be returned.
5354     The next path will also be tested if the path branched but no results could be found.
5355     Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
5356     A value of None is treated as the absence of a value.
5357
5358     The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
5359
5360     The keys in the path can be one of:
5361         - `None`:           Return the current object.
5362         - `str`/`int`:      Return `obj[key]`. For `re.Match, return `obj.group(key)`.
5363         - `slice`:          Branch out and return all values in `obj[key]`.
5364         - `Ellipsis`:       Branch out and return a list of all values.
5365         - `tuple`/`list`:   Branch out and return a list of all matching values.
5366                             Read as: `[traverse_obj(obj, branch) for branch in branches]`.
5367         - `function`:       Branch out and return values filtered by the function.
5368                             Read as: `[value for key, value in obj if function(key, value)]`.
5369                             For `Sequence`s, `key` is the index of the value.
5370         - `dict`            Transform the current object and return a matching dict.
5371                             Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
5372
5373         `tuple`, `list`, and `dict` all support nested paths and branches.
5374
5375     @params paths           Paths which to traverse by.
5376     @param default          Value to return if the paths do not match.
5377     @param expected_type    If a `type`, only accept final values of this type.
5378                             If any other callable, try to call the function on each result.
5379     @param get_all          If `False`, return the first matching result, otherwise all matching ones.
5380     @param casesense        If `False`, consider string dictionary keys as case insensitive.
5381
5382     The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
5383
5384     @param is_user_input    Whether the keys are generated from user input.
5385                             If `True` strings get converted to `int`/`slice` if needed.
5386     @param traverse_string  Whether to traverse into objects as strings.
5387                             If `True`, any non-compatible object will first be
5388                             converted into a string and then traversed into.
5389
5390
5391     @returns                The result of the object traversal.
5392                             If successful, `get_all=True`, and the path branches at least once,
5393                             then a list of results is returned instead.
5394                             A list is always returned if the last path branches and no `default` is given.
5395     """
5396     is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
5397     casefold = lambda k: k.casefold() if isinstance(k, str) else k
5398
5399     if isinstance(expected_type, type):
5400         type_test = lambda val: val if isinstance(val, expected_type) else None
5401     else:
5402         type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
5403
5404     def apply_key(key, obj):
5405         if obj is None:
5406             return
5407
5408         elif key is None:
5409             yield obj
5410
5411         elif isinstance(key, (list, tuple)):
5412             for branch in key:
5413                 _, result = apply_path(obj, branch)
5414                 yield from result
5415
5416         elif key is ...:
5417             if isinstance(obj, collections.abc.Mapping):
5418                 yield from obj.values()
5419             elif is_sequence(obj):
5420                 yield from obj
5421             elif isinstance(obj, re.Match):
5422                 yield from obj.groups()
5423             elif traverse_string:
5424                 yield from str(obj)
5425
5426         elif callable(key):
5427             if is_sequence(obj):
5428                 iter_obj = enumerate(obj)
5429             elif isinstance(obj, collections.abc.Mapping):
5430                 iter_obj = obj.items()
5431             elif isinstance(obj, re.Match):
5432                 iter_obj = enumerate((obj.group(), *obj.groups()))
5433             elif traverse_string:
5434                 iter_obj = enumerate(str(obj))
5435             else:
5436                 return
5437             yield from (v for k, v in iter_obj if try_call(key, args=(k, v)))
5438
5439         elif isinstance(key, dict):
5440             iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
5441             yield {k: v if v is not None else default for k, v in iter_obj
5442                    if v is not None or default is not NO_DEFAULT}
5443
5444         elif isinstance(obj, collections.abc.Mapping):
5445             yield (obj.get(key) if casesense or (key in obj)
5446                    else next((v for k, v in obj.items() if casefold(k) == key), None))
5447
5448         elif isinstance(obj, re.Match):
5449             if isinstance(key, int) or casesense:
5450                 with contextlib.suppress(IndexError):
5451                     yield obj.group(key)
5452                     return
5453
5454             if not isinstance(key, str):
5455                 return
5456
5457             yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
5458
5459         else:
5460             if is_user_input:
5461                 key = (int_or_none(key) if ':' not in key
5462                        else slice(*map(int_or_none, key.split(':'))))
5463
5464             if not isinstance(key, (int, slice)):
5465                 return
5466
5467             if not is_sequence(obj):
5468                 if not traverse_string:
5469                     return
5470                 obj = str(obj)
5471
5472             with contextlib.suppress(IndexError):
5473                 yield obj[key]
5474
5475     def apply_path(start_obj, path):
5476         objs = (start_obj,)
5477         has_branched = False
5478
5479         for key in variadic(path):
5480             if is_user_input and key == ':':
5481                 key = ...
5482
5483             if not casesense and isinstance(key, str):
5484                 key = key.casefold()
5485
5486             if key is ... or isinstance(key, (list, tuple)) or callable(key):
5487                 has_branched = True
5488
5489             key_func = functools.partial(apply_key, key)
5490             objs = itertools.chain.from_iterable(map(key_func, objs))
5491
5492         return has_branched, objs
5493
5494     def _traverse_obj(obj, path, use_list=True):
5495         has_branched, results = apply_path(obj, path)
5496         results = LazyList(x for x in map(type_test, results) if x is not None)
5497
5498         if get_all and has_branched:
5499             return results.exhaust() if results or use_list else None
5500
5501         return results[0] if results else None
5502
5503     for index, path in enumerate(paths, 1):
5504         use_list = default is NO_DEFAULT and index == len(paths)
5505         result = _traverse_obj(obj, path, use_list)
5506         if result is not None:
5507             return result
5508
5509     return None if default is NO_DEFAULT else default
5510
5511
5512 def traverse_dict(dictn, keys, casesense=True):
5513     deprecation_warning(f'"{__name__}.traverse_dict" is deprecated and may be removed '
5514                         f'in a future version. Use "{__name__}.traverse_obj" instead')
5515     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5516
5517
5518 def get_first(obj, keys, **kwargs):
5519     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5520
5521
5522 def time_seconds(**kwargs):
5523     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5524     return t.timestamp()
5525
5526
5527 # create a JSON Web Signature (jws) with HS256 algorithm
5528 # the resulting format is in JWS Compact Serialization
5529 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5530 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5531 def jwt_encode_hs256(payload_data, key, headers={}):
5532     header_data = {
5533         'alg': 'HS256',
5534         'typ': 'JWT',
5535     }
5536     if headers:
5537         header_data.update(headers)
5538     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5539     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5540     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5541     signature_b64 = base64.b64encode(h.digest())
5542     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5543     return token
5544
5545
5546 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5547 def jwt_decode_hs256(jwt):
5548     header_b64, payload_b64, signature_b64 = jwt.split('.')
5549     # add trailing ='s that may have been stripped, superfluous ='s are ignored
5550     payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
5551     return payload_data
5552
5553
5554 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5555
5556
5557 @functools.cache
5558 def supports_terminal_sequences(stream):
5559     if compat_os_name == 'nt':
5560         if not WINDOWS_VT_MODE:
5561             return False
5562     elif not os.getenv('TERM'):
5563         return False
5564     try:
5565         return stream.isatty()
5566     except BaseException:
5567         return False
5568
5569
5570 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5571     if get_windows_version() < (10, 0, 10586):
5572         return
5573     global WINDOWS_VT_MODE
5574     try:
5575         Popen.run('', shell=True)
5576     except Exception:
5577         return
5578
5579     WINDOWS_VT_MODE = True
5580     supports_terminal_sequences.cache_clear()
5581
5582
5583 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5584
5585
5586 def remove_terminal_sequences(string):
5587     return _terminal_sequences_re.sub('', string)
5588
5589
5590 def number_of_digits(number):
5591     return len('%d' % number)
5592
5593
5594 def join_nonempty(*values, delim='-', from_dict=None):
5595     if from_dict is not None:
5596         values = (traverse_obj(from_dict, variadic(v)) for v in values)
5597     return delim.join(map(str, filter(None, values)))
5598
5599
5600 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5601     """
5602     Find the largest format dimensions in terms of video width and, for each thumbnail:
5603     * Modify the URL: Match the width with the provided regex and replace with the former width
5604     * Update dimensions
5605
5606     This function is useful with video services that scale the provided thumbnails on demand
5607     """
5608     _keys = ('width', 'height')
5609     max_dimensions = max(
5610         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5611         default=(0, 0))
5612     if not max_dimensions[0]:
5613         return thumbnails
5614     return [
5615         merge_dicts(
5616             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5617             dict(zip(_keys, max_dimensions)), thumbnail)
5618         for thumbnail in thumbnails
5619     ]
5620
5621
5622 def parse_http_range(range):
5623     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5624     if not range:
5625         return None, None, None
5626     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5627     if not crg:
5628         return None, None, None
5629     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5630
5631
5632 def read_stdin(what):
5633     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5634     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5635     return sys.stdin
5636
5637
5638 def determine_file_encoding(data):
5639     """
5640     Detect the text encoding used
5641     @returns (encoding, bytes to skip)
5642     """
5643
5644     # BOM marks are given priority over declarations
5645     for bom, enc in BOMS:
5646         if data.startswith(bom):
5647             return enc, len(bom)
5648
5649     # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
5650     # We ignore the endianness to get a good enough match
5651     data = data.replace(b'\0', b'')
5652     mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
5653     return mobj.group(1).decode() if mobj else None, 0
5654
5655
5656 class Config:
5657     own_args = None
5658     parsed_args = None
5659     filename = None
5660     __initialized = False
5661
5662     def __init__(self, parser, label=None):
5663         self.parser, self.label = parser, label
5664         self._loaded_paths, self.configs = set(), []
5665
5666     def init(self, args=None, filename=None):
5667         assert not self.__initialized
5668         self.own_args, self.filename = args, filename
5669         return self.load_configs()
5670
5671     def load_configs(self):
5672         directory = ''
5673         if self.filename:
5674             location = os.path.realpath(self.filename)
5675             directory = os.path.dirname(location)
5676             if location in self._loaded_paths:
5677                 return False
5678             self._loaded_paths.add(location)
5679
5680         self.__initialized = True
5681         opts, _ = self.parser.parse_known_args(self.own_args)
5682         self.parsed_args = self.own_args
5683         for location in opts.config_locations or []:
5684             if location == '-':
5685                 if location in self._loaded_paths:
5686                     continue
5687                 self._loaded_paths.add(location)
5688                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5689                 continue
5690             location = os.path.join(directory, expand_path(location))
5691             if os.path.isdir(location):
5692                 location = os.path.join(location, 'yt-dlp.conf')
5693             if not os.path.exists(location):
5694                 self.parser.error(f'config location {location} does not exist')
5695             self.append_config(self.read_file(location), location)
5696         return True
5697
5698     def __str__(self):
5699         label = join_nonempty(
5700             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5701             delim=' ')
5702         return join_nonempty(
5703             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5704             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5705             delim='\n')
5706
5707     @staticmethod
5708     def read_file(filename, default=[]):
5709         try:
5710             optionf = open(filename, 'rb')
5711         except OSError:
5712             return default  # silently skip if file is not present
5713         try:
5714             enc, skip = determine_file_encoding(optionf.read(512))
5715             optionf.seek(skip, io.SEEK_SET)
5716         except OSError:
5717             enc = None  # silently skip read errors
5718         try:
5719             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5720             contents = optionf.read().decode(enc or preferredencoding())
5721             res = shlex.split(contents, comments=True)
5722         except Exception as err:
5723             raise ValueError(f'Unable to parse "{filename}": {err}')
5724         finally:
5725             optionf.close()
5726         return res
5727
5728     @staticmethod
5729     def hide_login_info(opts):
5730         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5731         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5732
5733         def _scrub_eq(o):
5734             m = eqre.match(o)
5735             if m:
5736                 return m.group('key') + '=PRIVATE'
5737             else:
5738                 return o
5739
5740         opts = list(map(_scrub_eq, opts))
5741         for idx, opt in enumerate(opts):
5742             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5743                 opts[idx + 1] = 'PRIVATE'
5744         return opts
5745
5746     def append_config(self, *args, label=None):
5747         config = type(self)(self.parser, label)
5748         config._loaded_paths = self._loaded_paths
5749         if config.init(*args):
5750             self.configs.append(config)
5751
5752     @property
5753     def all_args(self):
5754         for config in reversed(self.configs):
5755             yield from config.all_args
5756         yield from self.parsed_args or []
5757
5758     def parse_known_args(self, **kwargs):
5759         return self.parser.parse_known_args(self.all_args, **kwargs)
5760
5761     def parse_args(self):
5762         return self.parser.parse_args(self.all_args)
5763
5764
5765 class WebSocketsWrapper:
5766     """Wraps websockets module to use in non-async scopes"""
5767     pool = None
5768
5769     def __init__(self, url, headers=None, connect=True):
5770         self.loop = asyncio.new_event_loop()
5771         # XXX: "loop" is deprecated
5772         self.conn = websockets.connect(
5773             url, extra_headers=headers, ping_interval=None,
5774             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5775         if connect:
5776             self.__enter__()
5777         atexit.register(self.__exit__, None, None, None)
5778
5779     def __enter__(self):
5780         if not self.pool:
5781             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5782         return self
5783
5784     def send(self, *args):
5785         self.run_with_loop(self.pool.send(*args), self.loop)
5786
5787     def recv(self, *args):
5788         return self.run_with_loop(self.pool.recv(*args), self.loop)
5789
5790     def __exit__(self, type, value, traceback):
5791         try:
5792             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5793         finally:
5794             self.loop.close()
5795             self._cancel_all_tasks(self.loop)
5796
5797     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5798     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5799     @staticmethod
5800     def run_with_loop(main, loop):
5801         if not asyncio.iscoroutine(main):
5802             raise ValueError(f'a coroutine was expected, got {main!r}')
5803
5804         try:
5805             return loop.run_until_complete(main)
5806         finally:
5807             loop.run_until_complete(loop.shutdown_asyncgens())
5808             if hasattr(loop, 'shutdown_default_executor'):
5809                 loop.run_until_complete(loop.shutdown_default_executor())
5810
5811     @staticmethod
5812     def _cancel_all_tasks(loop):
5813         to_cancel = asyncio.all_tasks(loop)
5814
5815         if not to_cancel:
5816             return
5817
5818         for task in to_cancel:
5819             task.cancel()
5820
5821         # XXX: "loop" is removed in python 3.10+
5822         loop.run_until_complete(
5823             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5824
5825         for task in to_cancel:
5826             if task.cancelled():
5827                 continue
5828             if task.exception() is not None:
5829                 loop.call_exception_handler({
5830                     'message': 'unhandled exception during asyncio.run() shutdown',
5831                     'exception': task.exception(),
5832                     'task': task,
5833                 })
5834
5835
5836 def merge_headers(*dicts):
5837     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5838     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5839
5840
5841 def cached_method(f):
5842     """Cache a method"""
5843     signature = inspect.signature(f)
5844
5845     @functools.wraps(f)
5846     def wrapper(self, *args, **kwargs):
5847         bound_args = signature.bind(self, *args, **kwargs)
5848         bound_args.apply_defaults()
5849         key = tuple(bound_args.arguments.values())[1:]
5850
5851         cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {})
5852         if key not in cache:
5853             cache[key] = f(self, *args, **kwargs)
5854         return cache[key]
5855     return wrapper
5856
5857
5858 class classproperty:
5859     """property access for class methods with optional caching"""
5860     def __new__(cls, func=None, *args, **kwargs):
5861         if not func:
5862             return functools.partial(cls, *args, **kwargs)
5863         return super().__new__(cls)
5864
5865     def __init__(self, func, *, cache=False):
5866         functools.update_wrapper(self, func)
5867         self.func = func
5868         self._cache = {} if cache else None
5869
5870     def __get__(self, _, cls):
5871         if self._cache is None:
5872             return self.func(cls)
5873         elif cls not in self._cache:
5874             self._cache[cls] = self.func(cls)
5875         return self._cache[cls]
5876
5877
5878 class Namespace(types.SimpleNamespace):
5879     """Immutable namespace"""
5880
5881     def __iter__(self):
5882         return iter(self.__dict__.values())
5883
5884     @property
5885     def items_(self):
5886         return self.__dict__.items()
5887
5888
5889 MEDIA_EXTENSIONS = Namespace(
5890     common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
5891     video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
5892     common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5893     audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'),
5894     thumbnails=('jpg', 'png', 'webp'),
5895     storyboards=('mhtml', ),
5896     subtitles=('srt', 'vtt', 'ass', 'lrc'),
5897     manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5898 )
5899 MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
5900 MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
5901
5902 KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
5903
5904
5905 class RetryManager:
5906     """Usage:
5907         for retry in RetryManager(...):
5908             try:
5909                 ...
5910             except SomeException as err:
5911                 retry.error = err
5912                 continue
5913     """
5914     attempt, _error = 0, None
5915
5916     def __init__(self, _retries, _error_callback, **kwargs):
5917         self.retries = _retries or 0
5918         self.error_callback = functools.partial(_error_callback, **kwargs)
5919
5920     def _should_retry(self):
5921         return self._error is not NO_DEFAULT and self.attempt <= self.retries
5922
5923     @property
5924     def error(self):
5925         if self._error is NO_DEFAULT:
5926             return None
5927         return self._error
5928
5929     @error.setter
5930     def error(self, value):
5931         self._error = value
5932
5933     def __iter__(self):
5934         while self._should_retry():
5935             self.error = NO_DEFAULT
5936             self.attempt += 1
5937             yield self
5938             if self.error:
5939                 self.error_callback(self.error, self.attempt, self.retries)
5940
5941     @staticmethod
5942     def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
5943         """Utility function for reporting retries"""
5944         if count > retries:
5945             if error:
5946                 return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
5947             raise e
5948
5949         if not count:
5950             return warn(e)
5951         elif isinstance(e, ExtractorError):
5952             e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
5953         warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5954
5955         delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
5956         if delay:
5957             info(f'Sleeping {delay:.2f} seconds ...')
5958             time.sleep(delay)
5959
5960
5961 def make_archive_id(ie, video_id):
5962     ie_key = ie if isinstance(ie, str) else ie.ie_key()
5963     return f'{ie_key.lower()} {video_id}'
5964
5965
5966 def truncate_string(s, left, right=0):
5967     assert left > 3 and right >= 0
5968     if s is None or len(s) <= left + right:
5969         return s
5970     return f'{s[:left-3]}...{s[-right:]}'
5971
5972
5973 def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
5974     assert 'all' in alias_dict, '"all" alias is required'
5975     requested = list(start or [])
5976     for val in options:
5977         discard = val.startswith('-')
5978         if discard:
5979             val = val[1:]
5980
5981         if val in alias_dict:
5982             val = alias_dict[val] if not discard else [
5983                 i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
5984             # NB: Do not allow regex in aliases for performance
5985             requested = orderedSet_from_options(val, alias_dict, start=requested)
5986             continue
5987
5988         current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
5989                    else [val] if val in alias_dict['all'] else None)
5990         if current is None:
5991             raise ValueError(val)
5992
5993         if discard:
5994             for item in current:
5995                 while item in requested:
5996                     requested.remove(item)
5997         else:
5998             requested.extend(current)
5999
6000     return orderedSet(requested)
6001
6002
6003 class FormatSorter:
6004     regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
6005
6006     default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
6007                'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
6008                'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id')  # These must not be aliases
6009     ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
6010                     'height', 'width', 'proto', 'vext', 'abr', 'aext',
6011                     'fps', 'fs_approx', 'source', 'id')
6012
6013     settings = {
6014         'vcodec': {'type': 'ordered', 'regex': True,
6015                    'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
6016         'acodec': {'type': 'ordered', 'regex': True,
6017                    'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
6018         'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
6019                 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
6020         'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
6021                   'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
6022         'vext': {'type': 'ordered', 'field': 'video_ext',
6023                  'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
6024                  'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
6025         'aext': {'type': 'ordered', 'field': 'audio_ext',
6026                  'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
6027                  'order_free': ('ogg', 'opus', 'webm', 'mp3', 'm4a', 'aac', '', 'none')},
6028         'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
6029         'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
6030                        'field': ('vcodec', 'acodec'),
6031                        'function': lambda it: int(any(v != 'none' for v in it))},
6032         'ie_pref': {'priority': True, 'type': 'extractor'},
6033         'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
6034         'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
6035         'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
6036         'quality': {'convert': 'float', 'default': -1},
6037         'filesize': {'convert': 'bytes'},
6038         'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
6039         'id': {'convert': 'string', 'field': 'format_id'},
6040         'height': {'convert': 'float_none'},
6041         'width': {'convert': 'float_none'},
6042         'fps': {'convert': 'float_none'},
6043         'channels': {'convert': 'float_none', 'field': 'audio_channels'},
6044         'tbr': {'convert': 'float_none'},
6045         'vbr': {'convert': 'float_none'},
6046         'abr': {'convert': 'float_none'},
6047         'asr': {'convert': 'float_none'},
6048         'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
6049
6050         'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
6051         'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
6052         'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
6053         'ext': {'type': 'combined', 'field': ('vext', 'aext')},
6054         'res': {'type': 'multiple', 'field': ('height', 'width'),
6055                 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
6056
6057         # Actual field names
6058         'format_id': {'type': 'alias', 'field': 'id'},
6059         'preference': {'type': 'alias', 'field': 'ie_pref'},
6060         'language_preference': {'type': 'alias', 'field': 'lang'},
6061         'source_preference': {'type': 'alias', 'field': 'source'},
6062         'protocol': {'type': 'alias', 'field': 'proto'},
6063         'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
6064         'audio_channels': {'type': 'alias', 'field': 'channels'},
6065
6066         # Deprecated
6067         'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
6068         'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
6069         'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
6070         'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
6071         'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
6072         'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
6073         'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
6074         'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
6075         'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
6076         'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
6077         'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
6078         'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
6079         'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
6080         'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
6081         'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
6082         'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
6083         'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
6084         'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
6085         'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
6086         'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
6087     }
6088
6089     def __init__(self, ydl, field_preference):
6090         self.ydl = ydl
6091         self._order = []
6092         self.evaluate_params(self.ydl.params, field_preference)
6093         if ydl.params.get('verbose'):
6094             self.print_verbose_info(self.ydl.write_debug)
6095
6096     def _get_field_setting(self, field, key):
6097         if field not in self.settings:
6098             if key in ('forced', 'priority'):
6099                 return False
6100             self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
6101                                         'deprecated and may be removed in a future version')
6102             self.settings[field] = {}
6103         propObj = self.settings[field]
6104         if key not in propObj:
6105             type = propObj.get('type')
6106             if key == 'field':
6107                 default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field
6108             elif key == 'convert':
6109                 default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
6110             else:
6111                 default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
6112             propObj[key] = default
6113         return propObj[key]
6114
6115     def _resolve_field_value(self, field, value, convertNone=False):
6116         if value is None:
6117             if not convertNone:
6118                 return None
6119         else:
6120             value = value.lower()
6121         conversion = self._get_field_setting(field, 'convert')
6122         if conversion == 'ignore':
6123             return None
6124         if conversion == 'string':
6125             return value
6126         elif conversion == 'float_none':
6127             return float_or_none(value)
6128         elif conversion == 'bytes':
6129             return parse_bytes(value)
6130         elif conversion == 'order':
6131             order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
6132             use_regex = self._get_field_setting(field, 'regex')
6133             list_length = len(order_list)
6134             empty_pos = order_list.index('') if '' in order_list else list_length + 1
6135             if use_regex and value is not None:
6136                 for i, regex in enumerate(order_list):
6137                     if regex and re.match(regex, value):
6138                         return list_length - i
6139                 return list_length - empty_pos  # not in list
6140             else:  # not regex or  value = None
6141                 return list_length - (order_list.index(value) if value in order_list else empty_pos)
6142         else:
6143             if value.isnumeric():
6144                 return float(value)
6145             else:
6146                 self.settings[field]['convert'] = 'string'
6147                 return value
6148
6149     def evaluate_params(self, params, sort_extractor):
6150         self._use_free_order = params.get('prefer_free_formats', False)
6151         self._sort_user = params.get('format_sort', [])
6152         self._sort_extractor = sort_extractor
6153
6154         def add_item(field, reverse, closest, limit_text):
6155             field = field.lower()
6156             if field in self._order:
6157                 return
6158             self._order.append(field)
6159             limit = self._resolve_field_value(field, limit_text)
6160             data = {
6161                 'reverse': reverse,
6162                 'closest': False if limit is None else closest,
6163                 'limit_text': limit_text,
6164                 'limit': limit}
6165             if field in self.settings:
6166                 self.settings[field].update(data)
6167             else:
6168                 self.settings[field] = data
6169
6170         sort_list = (
6171             tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
6172             + (tuple() if params.get('format_sort_force', False)
6173                 else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
6174             + tuple(self._sort_user) + tuple(sort_extractor) + self.default)
6175
6176         for item in sort_list:
6177             match = re.match(self.regex, item)
6178             if match is None:
6179                 raise ExtractorError('Invalid format sort string "%s" given by extractor' % item)
6180             field = match.group('field')
6181             if field is None:
6182                 continue
6183             if self._get_field_setting(field, 'type') == 'alias':
6184                 alias, field = field, self._get_field_setting(field, 'field')
6185                 if self._get_field_setting(alias, 'deprecated'):
6186                     self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
6187                                                 f'be removed in a future version. Please use {field} instead')
6188             reverse = match.group('reverse') is not None
6189             closest = match.group('separator') == '~'
6190             limit_text = match.group('limit')
6191
6192             has_limit = limit_text is not None
6193             has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
6194             has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
6195
6196             fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
6197             limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
6198             limit_count = len(limits)
6199             for (i, f) in enumerate(fields):
6200                 add_item(f, reverse, closest,
6201                          limits[i] if i < limit_count
6202                          else limits[0] if has_limit and not has_multiple_limits
6203                          else None)
6204
6205     def print_verbose_info(self, write_debug):
6206         if self._sort_user:
6207             write_debug('Sort order given by user: %s' % ', '.join(self._sort_user))
6208         if self._sort_extractor:
6209             write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
6210         write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
6211             '+' if self._get_field_setting(field, 'reverse') else '', field,
6212             '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
6213                           self._get_field_setting(field, 'limit_text'),
6214                           self._get_field_setting(field, 'limit'))
6215             if self._get_field_setting(field, 'limit_text') is not None else '')
6216             for field in self._order if self._get_field_setting(field, 'visible')]))
6217
6218     def _calculate_field_preference_from_value(self, format, field, type, value):
6219         reverse = self._get_field_setting(field, 'reverse')
6220         closest = self._get_field_setting(field, 'closest')
6221         limit = self._get_field_setting(field, 'limit')
6222
6223         if type == 'extractor':
6224             maximum = self._get_field_setting(field, 'max')
6225             if value is None or (maximum is not None and value >= maximum):
6226                 value = -1
6227         elif type == 'boolean':
6228             in_list = self._get_field_setting(field, 'in_list')
6229             not_in_list = self._get_field_setting(field, 'not_in_list')
6230             value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
6231         elif type == 'ordered':
6232             value = self._resolve_field_value(field, value, True)
6233
6234         # try to convert to number
6235         val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
6236         is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
6237         if is_num:
6238             value = val_num
6239
6240         return ((-10, 0) if value is None
6241                 else (1, value, 0) if not is_num  # if a field has mixed strings and numbers, strings are sorted higher
6242                 else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
6243                 else (0, value, 0) if not reverse and (limit is None or value <= limit)
6244                 else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
6245                 else (-1, value, 0))
6246
6247     def _calculate_field_preference(self, format, field):
6248         type = self._get_field_setting(field, 'type')  # extractor, boolean, ordered, field, multiple
6249         get_value = lambda f: format.get(self._get_field_setting(f, 'field'))
6250         if type == 'multiple':
6251             type = 'field'  # Only 'field' is allowed in multiple for now
6252             actual_fields = self._get_field_setting(field, 'field')
6253
6254             value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
6255         else:
6256             value = get_value(field)
6257         return self._calculate_field_preference_from_value(format, field, type, value)
6258
6259     def calculate_preference(self, format):
6260         # Determine missing protocol
6261         if not format.get('protocol'):
6262             format['protocol'] = determine_protocol(format)
6263
6264         # Determine missing ext
6265         if not format.get('ext') and 'url' in format:
6266             format['ext'] = determine_ext(format['url'])
6267         if format.get('vcodec') == 'none':
6268             format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
6269             format['video_ext'] = 'none'
6270         else:
6271             format['video_ext'] = format['ext']
6272             format['audio_ext'] = 'none'
6273         # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'):  # Not supported?
6274         #    format['preference'] = -1000
6275
6276         # Determine missing bitrates
6277         if format.get('tbr') is None:
6278             if format.get('vbr') is not None and format.get('abr') is not None:
6279                 format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
6280         else:
6281             if format.get('vcodec') != 'none' and format.get('vbr') is None:
6282                 format['vbr'] = format.get('tbr') - format.get('abr', 0)
6283             if format.get('acodec') != 'none' and format.get('abr') is None:
6284                 format['abr'] = format.get('tbr') - format.get('vbr', 0)
6285
6286         return tuple(self._calculate_field_preference(format, field) for field in self._order)
6287
6288
6289 # Deprecated
6290 has_certifi = bool(certifi)
6291 has_websockets = bool(websockets)