yt_dlp/utils.py

   1 import asyncio
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import collections.abc
   9 import contextlib
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import html.entities
  18 import html.parser
  19 import http.client
  20 import http.cookiejar
  21 import importlib.util
  22 import inspect
  23 import io
  24 import itertools
  25 import json
  26 import locale
  27 import math
  28 import mimetypes
  29 import operator
  30 import os
  31 import platform
  32 import random
  33 import re
  34 import shlex
  35 import socket
  36 import ssl
  37 import struct
  38 import subprocess
  39 import sys
  40 import tempfile
  41 import time
  42 import traceback
  43 import types
  44 import unicodedata
  45 import urllib.error
  46 import urllib.parse
  47 import urllib.request
  48 import xml.etree.ElementTree
  49 import zlib
  50
  51 from .compat import functools  # isort: split
  52 from .compat import (
  53     compat_etree_fromstring,
  54     compat_expanduser,
  55     compat_HTMLParseError,
  56     compat_os_name,
  57     compat_shlex_quote,
  58 )
  59 from .dependencies import brotli, certifi, websockets, xattr
  60 from .socks import ProxyType, sockssocket
  61
  62
  63 def register_socks_protocols():
  64     # "Register" SOCKS protocols
  65     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  66     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  67     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  68         if scheme not in urllib.parse.uses_netloc:
  69             urllib.parse.uses_netloc.append(scheme)
  70
  71
  72 # This is not clearly defined otherwise
  73 compiled_regex_type = type(re.compile(''))
  74
  75
  76 def random_user_agent():
  77     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  78     _CHROME_VERSIONS = (
  79         '90.0.4430.212',
  80         '90.0.4430.24',
  81         '90.0.4430.70',
  82         '90.0.4430.72',
  83         '90.0.4430.85',
  84         '90.0.4430.93',
  85         '91.0.4472.101',
  86         '91.0.4472.106',
  87         '91.0.4472.114',
  88         '91.0.4472.124',
  89         '91.0.4472.164',
  90         '91.0.4472.19',
  91         '91.0.4472.77',
  92         '92.0.4515.107',
  93         '92.0.4515.115',
  94         '92.0.4515.131',
  95         '92.0.4515.159',
  96         '92.0.4515.43',
  97         '93.0.4556.0',
  98         '93.0.4577.15',
  99         '93.0.4577.63',
 100         '93.0.4577.82',
 101         '94.0.4606.41',
 102         '94.0.4606.54',
 103         '94.0.4606.61',
 104         '94.0.4606.71',
 105         '94.0.4606.81',
 106         '94.0.4606.85',
 107         '95.0.4638.17',
 108         '95.0.4638.50',
 109         '95.0.4638.54',
 110         '95.0.4638.69',
 111         '95.0.4638.74',
 112         '96.0.4664.18',
 113         '96.0.4664.45',
 114         '96.0.4664.55',
 115         '96.0.4664.93',
 116         '97.0.4692.20',
 117     )
 118     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 119
 120
 121 SUPPORTED_ENCODINGS = [
 122     'gzip', 'deflate'
 123 ]
 124 if brotli:
 125     SUPPORTED_ENCODINGS.append('br')
 126
 127 std_headers = {
 128     'User-Agent': random_user_agent(),
 129     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 130     'Accept-Language': 'en-us,en;q=0.5',
 131     'Sec-Fetch-Mode': 'navigate',
 132 }
 133
 134
 135 USER_AGENTS = {
 136     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 137 }
 138
 139
 140 NO_DEFAULT = object()
 141 IDENTITY = lambda x: x
 142
 143 ENGLISH_MONTH_NAMES = [
 144     'January', 'February', 'March', 'April', 'May', 'June',
 145     'July', 'August', 'September', 'October', 'November', 'December']
 146
 147 MONTH_NAMES = {
 148     'en': ENGLISH_MONTH_NAMES,
 149     'fr': [
 150         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 151         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 152     # these follow the genitive grammatical case (dopełniacz)
 153     # some websites might be using nominative, which will require another month list
 154     # https://en.wikibooks.org/wiki/Polish/Noun_cases
 155     'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
 156            'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
 157 }
 158
 159 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
 160 TIMEZONE_NAMES = {
 161     'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
 162     'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
 163     'EST': -5, 'EDT': -4,  # Eastern
 164     'CST': -6, 'CDT': -5,  # Central
 165     'MST': -7, 'MDT': -6,  # Mountain
 166     'PST': -8, 'PDT': -7   # Pacific
 167 }
 168
 169 # needed for sanitizing filenames in restricted mode
 170 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 171                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 172                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 173
 174 DATE_FORMATS = (
 175     '%d %B %Y',
 176     '%d %b %Y',
 177     '%B %d %Y',
 178     '%B %dst %Y',
 179     '%B %dnd %Y',
 180     '%B %drd %Y',
 181     '%B %dth %Y',
 182     '%b %d %Y',
 183     '%b %dst %Y',
 184     '%b %dnd %Y',
 185     '%b %drd %Y',
 186     '%b %dth %Y',
 187     '%b %dst %Y %I:%M',
 188     '%b %dnd %Y %I:%M',
 189     '%b %drd %Y %I:%M',
 190     '%b %dth %Y %I:%M',
 191     '%Y %m %d',
 192     '%Y-%m-%d',
 193     '%Y.%m.%d.',
 194     '%Y/%m/%d',
 195     '%Y/%m/%d %H:%M',
 196     '%Y/%m/%d %H:%M:%S',
 197     '%Y%m%d%H%M',
 198     '%Y%m%d%H%M%S',
 199     '%Y%m%d',
 200     '%Y-%m-%d %H:%M',
 201     '%Y-%m-%d %H:%M:%S',
 202     '%Y-%m-%d %H:%M:%S.%f',
 203     '%Y-%m-%d %H:%M:%S:%f',
 204     '%d.%m.%Y %H:%M',
 205     '%d.%m.%Y %H.%M',
 206     '%Y-%m-%dT%H:%M:%SZ',
 207     '%Y-%m-%dT%H:%M:%S.%fZ',
 208     '%Y-%m-%dT%H:%M:%S.%f0Z',
 209     '%Y-%m-%dT%H:%M:%S',
 210     '%Y-%m-%dT%H:%M:%S.%f',
 211     '%Y-%m-%dT%H:%M',
 212     '%b %d %Y at %H:%M',
 213     '%b %d %Y at %H:%M:%S',
 214     '%B %d %Y at %H:%M',
 215     '%B %d %Y at %H:%M:%S',
 216     '%H:%M %d-%b-%Y',
 217 )
 218
 219 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 220 DATE_FORMATS_DAY_FIRST.extend([
 221     '%d-%m-%Y',
 222     '%d.%m.%Y',
 223     '%d.%m.%y',
 224     '%d/%m/%Y',
 225     '%d/%m/%y',
 226     '%d/%m/%Y %H:%M:%S',
 227     '%d-%m-%Y %H:%M',
 228 ])
 229
 230 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 231 DATE_FORMATS_MONTH_FIRST.extend([
 232     '%m-%d-%Y',
 233     '%m.%d.%Y',
 234     '%m/%d/%Y',
 235     '%m/%d/%y',
 236     '%m/%d/%Y %H:%M:%S',
 237 ])
 238
 239 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 240 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
 241
 242 NUMBER_RE = r'\d+(?:\.\d+)?'
 243
 244
 245 @functools.cache
 246 def preferredencoding():
 247     """Get preferred encoding.
 248
 249     Returns the best encoding scheme for the system, based on
 250     locale.getpreferredencoding() and some further tweaks.
 251     """
 252     try:
 253         pref = locale.getpreferredencoding()
 254         'TEST'.encode(pref)
 255     except Exception:
 256         pref = 'UTF-8'
 257
 258     return pref
 259
 260
 261 def write_json_file(obj, fn):
 262     """ Encode obj as JSON and write it to fn, atomically if possible """
 263
 264     tf = tempfile.NamedTemporaryFile(
 265         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 266         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 267
 268     try:
 269         with tf:
 270             json.dump(obj, tf, ensure_ascii=False)
 271         if sys.platform == 'win32':
 272             # Need to remove existing file on Windows, else os.rename raises
 273             # WindowsError or FileExistsError.
 274             with contextlib.suppress(OSError):
 275                 os.unlink(fn)
 276         with contextlib.suppress(OSError):
 277             mask = os.umask(0)
 278             os.umask(mask)
 279             os.chmod(tf.name, 0o666 & ~mask)
 280         os.rename(tf.name, fn)
 281     except Exception:
 282         with contextlib.suppress(OSError):
 283             os.remove(tf.name)
 284         raise
 285
 286
 287 def find_xpath_attr(node, xpath, key, val=None):
 288     """ Find the xpath xpath[@key=val] """
 289     assert re.match(r'^[a-zA-Z_-]+$', key)
 290     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 291     return node.find(expr)
 292
 293 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 294 # the namespace parameter
 295
 296
 297 def xpath_with_ns(path, ns_map):
 298     components = [c.split(':') for c in path.split('/')]
 299     replaced = []
 300     for c in components:
 301         if len(c) == 1:
 302             replaced.append(c[0])
 303         else:
 304             ns, tag = c
 305             replaced.append('{%s}%s' % (ns_map[ns], tag))
 306     return '/'.join(replaced)
 307
 308
 309 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 310     def _find_xpath(xpath):
 311         return node.find(xpath)
 312
 313     if isinstance(xpath, str):
 314         n = _find_xpath(xpath)
 315     else:
 316         for xp in xpath:
 317             n = _find_xpath(xp)
 318             if n is not None:
 319                 break
 320
 321     if n is None:
 322         if default is not NO_DEFAULT:
 323             return default
 324         elif fatal:
 325             name = xpath if name is None else name
 326             raise ExtractorError('Could not find XML element %s' % name)
 327         else:
 328             return None
 329     return n
 330
 331
 332 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 333     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 334     if n is None or n == default:
 335         return n
 336     if n.text is None:
 337         if default is not NO_DEFAULT:
 338             return default
 339         elif fatal:
 340             name = xpath if name is None else name
 341             raise ExtractorError('Could not find XML element\'s text %s' % name)
 342         else:
 343             return None
 344     return n.text
 345
 346
 347 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 348     n = find_xpath_attr(node, xpath, key)
 349     if n is None:
 350         if default is not NO_DEFAULT:
 351             return default
 352         elif fatal:
 353             name = f'{xpath}[@{key}]' if name is None else name
 354             raise ExtractorError('Could not find XML attribute %s' % name)
 355         else:
 356             return None
 357     return n.attrib[key]
 358
 359
 360 def get_element_by_id(id, html, **kwargs):
 361     """Return the content of the tag with the specified ID in the passed HTML document"""
 362     return get_element_by_attribute('id', id, html, **kwargs)
 363
 364
 365 def get_element_html_by_id(id, html, **kwargs):
 366     """Return the html of the tag with the specified ID in the passed HTML document"""
 367     return get_element_html_by_attribute('id', id, html, **kwargs)
 368
 369
 370 def get_element_by_class(class_name, html):
 371     """Return the content of the first tag with the specified class in the passed HTML document"""
 372     retval = get_elements_by_class(class_name, html)
 373     return retval[0] if retval else None
 374
 375
 376 def get_element_html_by_class(class_name, html):
 377     """Return the html of the first tag with the specified class in the passed HTML document"""
 378     retval = get_elements_html_by_class(class_name, html)
 379     return retval[0] if retval else None
 380
 381
 382 def get_element_by_attribute(attribute, value, html, **kwargs):
 383     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 384     return retval[0] if retval else None
 385
 386
 387 def get_element_html_by_attribute(attribute, value, html, **kargs):
 388     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 389     return retval[0] if retval else None
 390
 391
 392 def get_elements_by_class(class_name, html, **kargs):
 393     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 394     return get_elements_by_attribute(
 395         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 396         html, escape_value=False)
 397
 398
 399 def get_elements_html_by_class(class_name, html):
 400     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 401     return get_elements_html_by_attribute(
 402         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 403         html, escape_value=False)
 404
 405
 406 def get_elements_by_attribute(*args, **kwargs):
 407     """Return the content of the tag with the specified attribute in the passed HTML document"""
 408     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 409
 410
 411 def get_elements_html_by_attribute(*args, **kwargs):
 412     """Return the html of the tag with the specified attribute in the passed HTML document"""
 413     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 414
 415
 416 def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
 417     """
 418     Return the text (content) and the html (whole) of the tag with the specified
 419     attribute in the passed HTML document
 420     """
 421     if not value:
 422         return
 423
 424     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 425
 426     value = re.escape(value) if escape_value else value
 427
 428     partial_element_re = rf'''(?x)
 429         <(?P<tag>{tag})
 430          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 431          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 432         '''
 433
 434     for m in re.finditer(partial_element_re, html):
 435         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 436
 437         yield (
 438             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 439             whole
 440         )
 441
 442
 443 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
 444     """
 445     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 446     closing tag for the first opening tag it has encountered, and can be used
 447     as a context manager
 448     """
 449
 450     class HTMLBreakOnClosingTagException(Exception):
 451         pass
 452
 453     def __init__(self):
 454         self.tagstack = collections.deque()
 455         html.parser.HTMLParser.__init__(self)
 456
 457     def __enter__(self):
 458         return self
 459
 460     def __exit__(self, *_):
 461         self.close()
 462
 463     def close(self):
 464         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 465         # so data remains buffered; we no longer have any interest in it, thus
 466         # override this method to discard it
 467         pass
 468
 469     def handle_starttag(self, tag, _):
 470         self.tagstack.append(tag)
 471
 472     def handle_endtag(self, tag):
 473         if not self.tagstack:
 474             raise compat_HTMLParseError('no tags in the stack')
 475         while self.tagstack:
 476             inner_tag = self.tagstack.pop()
 477             if inner_tag == tag:
 478                 break
 479         else:
 480             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 481         if not self.tagstack:
 482             raise self.HTMLBreakOnClosingTagException()
 483
 484
 485 # XXX: This should be far less strict
 486 def get_element_text_and_html_by_tag(tag, html):
 487     """
 488     For the first element with the specified tag in the passed HTML document
 489     return its' content (text) and the whole element (html)
 490     """
 491     def find_or_raise(haystack, needle, exc):
 492         try:
 493             return haystack.index(needle)
 494         except ValueError:
 495             raise exc
 496     closing_tag = f'</{tag}>'
 497     whole_start = find_or_raise(
 498         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 499     content_start = find_or_raise(
 500         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 501     content_start += whole_start + 1
 502     with HTMLBreakOnClosingTagParser() as parser:
 503         parser.feed(html[whole_start:content_start])
 504         if not parser.tagstack or parser.tagstack[0] != tag:
 505             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 506         offset = content_start
 507         while offset < len(html):
 508             next_closing_tag_start = find_or_raise(
 509                 html[offset:], closing_tag,
 510                 compat_HTMLParseError(f'closing {tag} tag not found'))
 511             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 512             try:
 513                 parser.feed(html[offset:offset + next_closing_tag_end])
 514                 offset += next_closing_tag_end
 515             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 516                 return html[content_start:offset + next_closing_tag_start], \
 517                     html[whole_start:offset + next_closing_tag_end]
 518         raise compat_HTMLParseError('unexpected end of html')
 519
 520
 521 class HTMLAttributeParser(html.parser.HTMLParser):
 522     """Trivial HTML parser to gather the attributes for a single element"""
 523
 524     def __init__(self):
 525         self.attrs = {}
 526         html.parser.HTMLParser.__init__(self)
 527
 528     def handle_starttag(self, tag, attrs):
 529         self.attrs = dict(attrs)
 530         raise compat_HTMLParseError('done')
 531
 532
 533 class HTMLListAttrsParser(html.parser.HTMLParser):
 534     """HTML parser to gather the attributes for the elements of a list"""
 535
 536     def __init__(self):
 537         html.parser.HTMLParser.__init__(self)
 538         self.items = []
 539         self._level = 0
 540
 541     def handle_starttag(self, tag, attrs):
 542         if tag == 'li' and self._level == 0:
 543             self.items.append(dict(attrs))
 544         self._level += 1
 545
 546     def handle_endtag(self, tag):
 547         self._level -= 1
 548
 549
 550 def extract_attributes(html_element):
 551     """Given a string for an HTML element such as
 552     <el
 553          a="foo" B="bar" c="&98;az" d=boz
 554          empty= noval entity="&amp;"
 555          sq='"' dq="'"
 556     >
 557     Decode and return a dictionary of attributes.
 558     {
 559         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 560         'empty': '', 'noval': None, 'entity': '&',
 561         'sq': '"', 'dq': '\''
 562     }.
 563     """
 564     parser = HTMLAttributeParser()
 565     with contextlib.suppress(compat_HTMLParseError):
 566         parser.feed(html_element)
 567         parser.close()
 568     return parser.attrs
 569
 570
 571 def parse_list(webpage):
 572     """Given a string for an series of HTML <li> elements,
 573     return a dictionary of their attributes"""
 574     parser = HTMLListAttrsParser()
 575     parser.feed(webpage)
 576     parser.close()
 577     return parser.items
 578
 579
 580 def clean_html(html):
 581     """Clean an HTML snippet into a readable string"""
 582
 583     if html is None:  # Convenience for sanitizing descriptions etc.
 584         return html
 585
 586     html = re.sub(r'\s+', ' ', html)
 587     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 588     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 589     # Strip html tags
 590     html = re.sub('<.*?>', '', html)
 591     # Replace html entities
 592     html = unescapeHTML(html)
 593     return html.strip()
 594
 595
 596 class LenientJSONDecoder(json.JSONDecoder):
 597     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 598         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 599         super().__init__(*args, **kwargs)
 600
 601     def decode(self, s):
 602         if self.transform_source:
 603             s = self.transform_source(s)
 604         try:
 605             if self.ignore_extra:
 606                 return self.raw_decode(s.lstrip())[0]
 607             return super().decode(s)
 608         except json.JSONDecodeError as e:
 609             if e.pos is not None:
 610                 raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
 611             raise
 612
 613
 614 def sanitize_open(filename, open_mode):
 615     """Try to open the given filename, and slightly tweak it if this fails.
 616
 617     Attempts to open the given filename. If this fails, it tries to change
 618     the filename slightly, step by step, until it's either able to open it
 619     or it fails and raises a final exception, like the standard open()
 620     function.
 621
 622     It returns the tuple (stream, definitive_file_name).
 623     """
 624     if filename == '-':
 625         if sys.platform == 'win32':
 626             import msvcrt
 627
 628             # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
 629             with contextlib.suppress(io.UnsupportedOperation):
 630                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 631         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 632
 633     for attempt in range(2):
 634         try:
 635             try:
 636                 if sys.platform == 'win32':
 637                     # FIXME: An exclusive lock also locks the file from being read.
 638                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 639                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 640                     raise LockingUnsupportedError()
 641                 stream = locked_file(filename, open_mode, block=False).__enter__()
 642             except OSError:
 643                 stream = open(filename, open_mode)
 644             return stream, filename
 645         except OSError as err:
 646             if attempt or err.errno in (errno.EACCES,):
 647                 raise
 648             old_filename, filename = filename, sanitize_path(filename)
 649             if old_filename == filename:
 650                 raise
 651
 652
 653 def timeconvert(timestr):
 654     """Convert RFC 2822 defined time string into system timestamp"""
 655     timestamp = None
 656     timetuple = email.utils.parsedate_tz(timestr)
 657     if timetuple is not None:
 658         timestamp = email.utils.mktime_tz(timetuple)
 659     return timestamp
 660
 661
 662 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 663     """Sanitizes a string so it could be used as part of a filename.
 664     @param restricted   Use a stricter subset of allowed characters
 665     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 666                         If unset, yt-dlp's new sanitization rules are in effect
 667     """
 668     if s == '':
 669         return ''
 670
 671     def replace_insane(char):
 672         if restricted and char in ACCENT_CHARS:
 673             return ACCENT_CHARS[char]
 674         elif not restricted and char == '\n':
 675             return '\0 '
 676         elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
 677             # Replace with their full-width unicode counterparts
 678             return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
 679         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 680             return ''
 681         elif char == '"':
 682             return '' if restricted else '\''
 683         elif char == ':':
 684             return '\0_\0-' if restricted else '\0 \0-'
 685         elif char in '\\/|*<>':
 686             return '\0_'
 687         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 688             return '\0_'
 689         return char
 690
 691     # Replace look-alike Unicode glyphs
 692     if restricted and (is_id is NO_DEFAULT or not is_id):
 693         s = unicodedata.normalize('NFKC', s)
 694     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 695     result = ''.join(map(replace_insane, s))
 696     if is_id is NO_DEFAULT:
 697         result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
 698         STRIP_RE = r'(?:\0.|[ _-])*'
 699         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 700     result = result.replace('\0', '') or '_'
 701
 702     if not is_id:
 703         while '__' in result:
 704             result = result.replace('__', '_')
 705         result = result.strip('_')
 706         # Common case of "Foreign band name - English song title"
 707         if restricted and result.startswith('-_'):
 708             result = result[2:]
 709         if result.startswith('-'):
 710             result = '_' + result[len('-'):]
 711         result = result.lstrip('.')
 712         if not result:
 713             result = '_'
 714     return result
 715
 716
 717 def sanitize_path(s, force=False):
 718     """Sanitizes and normalizes path on Windows"""
 719     if sys.platform == 'win32':
 720         force = False
 721         drive_or_unc, _ = os.path.splitdrive(s)
 722     elif force:
 723         drive_or_unc = ''
 724     else:
 725         return s
 726
 727     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 728     if drive_or_unc:
 729         norm_path.pop(0)
 730     sanitized_path = [
 731         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 732         for path_part in norm_path]
 733     if drive_or_unc:
 734         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 735     elif force and s and s[0] == os.path.sep:
 736         sanitized_path.insert(0, os.path.sep)
 737     return os.path.join(*sanitized_path)
 738
 739
 740 def sanitize_url(url, *, scheme='http'):
 741     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 742     # the number of unwanted failures due to missing protocol
 743     if url is None:
 744         return
 745     elif url.startswith('//'):
 746         return f'{scheme}:{url}'
 747     # Fix some common typos seen so far
 748     COMMON_TYPOS = (
 749         # https://github.com/ytdl-org/youtube-dl/issues/15649
 750         (r'^httpss://', r'https://'),
 751         # https://bx1.be/lives/direct-tv/
 752         (r'^rmtp([es]?)://', r'rtmp\1://'),
 753     )
 754     for mistake, fixup in COMMON_TYPOS:
 755         if re.match(mistake, url):
 756             return re.sub(mistake, fixup, url)
 757     return url
 758
 759
 760 def extract_basic_auth(url):
 761     parts = urllib.parse.urlsplit(url)
 762     if parts.username is None:
 763         return url, None
 764     url = urllib.parse.urlunsplit(parts._replace(netloc=(
 765         parts.hostname if parts.port is None
 766         else '%s:%d' % (parts.hostname, parts.port))))
 767     auth_payload = base64.b64encode(
 768         ('%s:%s' % (parts.username, parts.password or '')).encode())
 769     return url, f'Basic {auth_payload.decode()}'
 770
 771
 772 def sanitized_Request(url, *args, **kwargs):
 773     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 774     if auth_header is not None:
 775         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 776         headers['Authorization'] = auth_header
 777     return urllib.request.Request(url, *args, **kwargs)
 778
 779
 780 def expand_path(s):
 781     """Expand shell variables and ~"""
 782     return os.path.expandvars(compat_expanduser(s))
 783
 784
 785 def orderedSet(iterable, *, lazy=False):
 786     """Remove all duplicates from the input iterable"""
 787     def _iter():
 788         seen = []  # Do not use set since the items can be unhashable
 789         for x in iterable:
 790             if x not in seen:
 791                 seen.append(x)
 792                 yield x
 793
 794     return _iter() if lazy else list(_iter())
 795
 796
 797 def _htmlentity_transform(entity_with_semicolon):
 798     """Transforms an HTML entity to a character."""
 799     entity = entity_with_semicolon[:-1]
 800
 801     # Known non-numeric HTML entity
 802     if entity in html.entities.name2codepoint:
 803         return chr(html.entities.name2codepoint[entity])
 804
 805     # TODO: HTML5 allows entities without a semicolon.
 806     # E.g. '&Eacuteric' should be decoded as 'Éric'.
 807     if entity_with_semicolon in html.entities.html5:
 808         return html.entities.html5[entity_with_semicolon]
 809
 810     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 811     if mobj is not None:
 812         numstr = mobj.group(1)
 813         if numstr.startswith('x'):
 814             base = 16
 815             numstr = '0%s' % numstr
 816         else:
 817             base = 10
 818         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 819         with contextlib.suppress(ValueError):
 820             return chr(int(numstr, base))
 821
 822     # Unknown entity in name, return its literal representation
 823     return '&%s;' % entity
 824
 825
 826 def unescapeHTML(s):
 827     if s is None:
 828         return None
 829     assert isinstance(s, str)
 830
 831     return re.sub(
 832         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 833
 834
 835 def escapeHTML(text):
 836     return (
 837         text
 838         .replace('&', '&amp;')
 839         .replace('<', '&lt;')
 840         .replace('>', '&gt;')
 841         .replace('"', '&quot;')
 842         .replace("'", '&#39;')
 843     )
 844
 845
 846 def process_communicate_or_kill(p, *args, **kwargs):
 847     deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
 848                         f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
 849     return Popen.communicate_or_kill(p, *args, **kwargs)
 850
 851
 852 class Popen(subprocess.Popen):
 853     if sys.platform == 'win32':
 854         _startupinfo = subprocess.STARTUPINFO()
 855         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 856     else:
 857         _startupinfo = None
 858
 859     @staticmethod
 860     def _fix_pyinstaller_ld_path(env):
 861         """Restore LD_LIBRARY_PATH when using PyInstaller
 862             Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
 863                  https://github.com/yt-dlp/yt-dlp/issues/4573
 864         """
 865         if not hasattr(sys, '_MEIPASS'):
 866             return
 867
 868         def _fix(key):
 869             orig = env.get(f'{key}_ORIG')
 870             if orig is None:
 871                 env.pop(key, None)
 872             else:
 873                 env[key] = orig
 874
 875         _fix('LD_LIBRARY_PATH')  # Linux
 876         _fix('DYLD_LIBRARY_PATH')  # macOS
 877
 878     def __init__(self, *args, env=None, text=False, **kwargs):
 879         if env is None:
 880             env = os.environ.copy()
 881         self._fix_pyinstaller_ld_path(env)
 882
 883         if text is True:
 884             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 885             kwargs.setdefault('encoding', 'utf-8')
 886             kwargs.setdefault('errors', 'replace')
 887         super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo)
 888
 889     def communicate_or_kill(self, *args, **kwargs):
 890         try:
 891             return self.communicate(*args, **kwargs)
 892         except BaseException:  # Including KeyboardInterrupt
 893             self.kill(timeout=None)
 894             raise
 895
 896     def kill(self, *, timeout=0):
 897         super().kill()
 898         if timeout != 0:
 899             self.wait(timeout=timeout)
 900
 901     @classmethod
 902     def run(cls, *args, timeout=None, **kwargs):
 903         with cls(*args, **kwargs) as proc:
 904             default = '' if proc.text_mode else b''
 905             stdout, stderr = proc.communicate_or_kill(timeout=timeout)
 906             return stdout or default, stderr or default, proc.returncode
 907
 908
 909 def get_subprocess_encoding():
 910     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 911         # For subprocess calls, encode with locale encoding
 912         # Refer to http://stackoverflow.com/a/9951851/35070
 913         encoding = preferredencoding()
 914     else:
 915         encoding = sys.getfilesystemencoding()
 916     if encoding is None:
 917         encoding = 'utf-8'
 918     return encoding
 919
 920
 921 def encodeFilename(s, for_subprocess=False):
 922     assert isinstance(s, str)
 923     return s
 924
 925
 926 def decodeFilename(b, for_subprocess=False):
 927     return b
 928
 929
 930 def encodeArgument(s):
 931     # Legacy code that uses byte strings
 932     # Uncomment the following line after fixing all post processors
 933     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
 934     return s if isinstance(s, str) else s.decode('ascii')
 935
 936
 937 def decodeArgument(b):
 938     return b
 939
 940
 941 def decodeOption(optval):
 942     if optval is None:
 943         return optval
 944     if isinstance(optval, bytes):
 945         optval = optval.decode(preferredencoding())
 946
 947     assert isinstance(optval, str)
 948     return optval
 949
 950
 951 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 952
 953
 954 def timetuple_from_msec(msec):
 955     secs, msec = divmod(msec, 1000)
 956     mins, secs = divmod(secs, 60)
 957     hrs, mins = divmod(mins, 60)
 958     return _timetuple(hrs, mins, secs, msec)
 959
 960
 961 def formatSeconds(secs, delim=':', msec=False):
 962     time = timetuple_from_msec(secs * 1000)
 963     if time.hours:
 964         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 965     elif time.minutes:
 966         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 967     else:
 968         ret = '%d' % time.seconds
 969     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 970
 971
 972 def _ssl_load_windows_store_certs(ssl_context, storename):
 973     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 974     try:
 975         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 976                  if encoding == 'x509_asn' and (
 977                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 978     except PermissionError:
 979         return
 980     for cert in certs:
 981         with contextlib.suppress(ssl.SSLError):
 982             ssl_context.load_verify_locations(cadata=cert)
 983
 984
 985 def make_HTTPS_handler(params, **kwargs):
 986     opts_check_certificate = not params.get('nocheckcertificate')
 987     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 988     context.check_hostname = opts_check_certificate
 989     if params.get('legacyserverconnect'):
 990         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 991         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 992         context.set_ciphers('DEFAULT')
 993     elif (
 994         sys.version_info < (3, 10)
 995         and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1)
 996         and not ssl.OPENSSL_VERSION.startswith('LibreSSL')
 997     ):
 998         # Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1].
 999         # This is to ensure consistent behavior across Python versions, and help avoid fingerprinting
1000         # in some situations [2][3].
1001         # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely
1002         # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe.
1003         # LibreSSL is excluded until further investigation due to cipher support issues [5][6].
1004         # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536
1005         # 2. https://github.com/yt-dlp/yt-dlp/issues/4627
1006         # 3. https://github.com/yt-dlp/yt-dlp/pull/5294
1007         # 4. https://peps.python.org/pep-0644/
1008         # 5. https://peps.python.org/pep-0644/#libressl-support
1009         # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368
1010         context.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM')
1011         context.minimum_version = ssl.TLSVersion.TLSv1_2
1012
1013     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
1014     if opts_check_certificate:
1015         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
1016             context.load_verify_locations(cafile=certifi.where())
1017         else:
1018             try:
1019                 context.load_default_certs()
1020                 # Work around the issue in load_default_certs when there are bad certificates. See:
1021                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
1022                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
1023             except ssl.SSLError:
1024                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
1025                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
1026                     for storename in ('CA', 'ROOT'):
1027                         _ssl_load_windows_store_certs(context, storename)
1028                 context.set_default_verify_paths()
1029
1030     client_certfile = params.get('client_certificate')
1031     if client_certfile:
1032         try:
1033             context.load_cert_chain(
1034                 client_certfile, keyfile=params.get('client_certificate_key'),
1035                 password=params.get('client_certificate_password'))
1036         except ssl.SSLError:
1037             raise YoutubeDLError('Unable to load client certificate')
1038
1039     # Some servers may reject requests if ALPN extension is not sent. See:
1040     # https://github.com/python/cpython/issues/85140
1041     # https://github.com/yt-dlp/yt-dlp/issues/3878
1042     with contextlib.suppress(NotImplementedError):
1043         context.set_alpn_protocols(['http/1.1'])
1044
1045     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
1046
1047
1048 def bug_reports_message(before=';'):
1049     from .update import REPOSITORY
1050
1051     msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
1052            'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')
1053
1054     before = before.rstrip()
1055     if not before or before.endswith(('.', '!', '?')):
1056         msg = msg[0].title() + msg[1:]
1057
1058     return (before + ' ' if before else '') + msg
1059
1060
1061 class YoutubeDLError(Exception):
1062     """Base exception for YoutubeDL errors."""
1063     msg = None
1064
1065     def __init__(self, msg=None):
1066         if msg is not None:
1067             self.msg = msg
1068         elif self.msg is None:
1069             self.msg = type(self).__name__
1070         super().__init__(self.msg)
1071
1072
1073 network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
1074 if hasattr(ssl, 'CertificateError'):
1075     network_exceptions.append(ssl.CertificateError)
1076 network_exceptions = tuple(network_exceptions)
1077
1078
1079 class ExtractorError(YoutubeDLError):
1080     """Error during info extraction."""
1081
1082     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1083         """ tb, if given, is the original traceback (so that it can be printed out).
1084         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1085         """
1086         if sys.exc_info()[0] in network_exceptions:
1087             expected = True
1088
1089         self.orig_msg = str(msg)
1090         self.traceback = tb
1091         self.expected = expected
1092         self.cause = cause
1093         self.video_id = video_id
1094         self.ie = ie
1095         self.exc_info = sys.exc_info()  # preserve original exception
1096         if isinstance(self.exc_info[1], ExtractorError):
1097             self.exc_info = self.exc_info[1].exc_info
1098         super().__init__(self.__msg)
1099
1100     @property
1101     def __msg(self):
1102         return ''.join((
1103             format_field(self.ie, None, '[%s] '),
1104             format_field(self.video_id, None, '%s: '),
1105             self.orig_msg,
1106             format_field(self.cause, None, ' (caused by %r)'),
1107             '' if self.expected else bug_reports_message()))
1108
1109     def format_traceback(self):
1110         return join_nonempty(
1111             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1112             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1113             delim='\n') or None
1114
1115     def __setattr__(self, name, value):
1116         super().__setattr__(name, value)
1117         if getattr(self, 'msg', None) and name not in ('msg', 'args'):
1118             self.msg = self.__msg or type(self).__name__
1119             self.args = (self.msg, )  # Cannot be property
1120
1121
1122 class UnsupportedError(ExtractorError):
1123     def __init__(self, url):
1124         super().__init__(
1125             'Unsupported URL: %s' % url, expected=True)
1126         self.url = url
1127
1128
1129 class RegexNotFoundError(ExtractorError):
1130     """Error when a regex didn't match"""
1131     pass
1132
1133
1134 class GeoRestrictedError(ExtractorError):
1135     """Geographic restriction Error exception.
1136
1137     This exception may be thrown when a video is not available from your
1138     geographic location due to geographic restrictions imposed by a website.
1139     """
1140
1141     def __init__(self, msg, countries=None, **kwargs):
1142         kwargs['expected'] = True
1143         super().__init__(msg, **kwargs)
1144         self.countries = countries
1145
1146
1147 class UserNotLive(ExtractorError):
1148     """Error when a channel/user is not live"""
1149
1150     def __init__(self, msg=None, **kwargs):
1151         kwargs['expected'] = True
1152         super().__init__(msg or 'The channel is not currently live', **kwargs)
1153
1154
1155 class DownloadError(YoutubeDLError):
1156     """Download Error exception.
1157
1158     This exception may be thrown by FileDownloader objects if they are not
1159     configured to continue on errors. They will contain the appropriate
1160     error message.
1161     """
1162
1163     def __init__(self, msg, exc_info=None):
1164         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1165         super().__init__(msg)
1166         self.exc_info = exc_info
1167
1168
1169 class EntryNotInPlaylist(YoutubeDLError):
1170     """Entry not in playlist exception.
1171
1172     This exception will be thrown by YoutubeDL when a requested entry
1173     is not found in the playlist info_dict
1174     """
1175     msg = 'Entry not found in info'
1176
1177
1178 class SameFileError(YoutubeDLError):
1179     """Same File exception.
1180
1181     This exception will be thrown by FileDownloader objects if they detect
1182     multiple files would have to be downloaded to the same file on disk.
1183     """
1184     msg = 'Fixed output name but more than one file to download'
1185
1186     def __init__(self, filename=None):
1187         if filename is not None:
1188             self.msg += f': {filename}'
1189         super().__init__(self.msg)
1190
1191
1192 class PostProcessingError(YoutubeDLError):
1193     """Post Processing exception.
1194
1195     This exception may be raised by PostProcessor's .run() method to
1196     indicate an error in the postprocessing task.
1197     """
1198
1199
1200 class DownloadCancelled(YoutubeDLError):
1201     """ Exception raised when the download queue should be interrupted """
1202     msg = 'The download was cancelled'
1203
1204
1205 class ExistingVideoReached(DownloadCancelled):
1206     """ --break-on-existing triggered """
1207     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1208
1209
1210 class RejectedVideoReached(DownloadCancelled):
1211     """ --break-on-reject triggered """
1212     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1213
1214
1215 class MaxDownloadsReached(DownloadCancelled):
1216     """ --max-downloads limit has been reached. """
1217     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1218
1219
1220 class ReExtractInfo(YoutubeDLError):
1221     """ Video info needs to be re-extracted. """
1222
1223     def __init__(self, msg, expected=False):
1224         super().__init__(msg)
1225         self.expected = expected
1226
1227
1228 class ThrottledDownload(ReExtractInfo):
1229     """ Download speed below --throttled-rate. """
1230     msg = 'The download speed is below throttle limit'
1231
1232     def __init__(self):
1233         super().__init__(self.msg, expected=False)
1234
1235
1236 class UnavailableVideoError(YoutubeDLError):
1237     """Unavailable Format exception.
1238
1239     This exception will be thrown when a video is requested
1240     in a format that is not available for that video.
1241     """
1242     msg = 'Unable to download video'
1243
1244     def __init__(self, err=None):
1245         if err is not None:
1246             self.msg += f': {err}'
1247         super().__init__(self.msg)
1248
1249
1250 class ContentTooShortError(YoutubeDLError):
1251     """Content Too Short exception.
1252
1253     This exception may be raised by FileDownloader objects when a file they
1254     download is too small for what the server announced first, indicating
1255     the connection was probably interrupted.
1256     """
1257
1258     def __init__(self, downloaded, expected):
1259         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1260         # Both in bytes
1261         self.downloaded = downloaded
1262         self.expected = expected
1263
1264
1265 class XAttrMetadataError(YoutubeDLError):
1266     def __init__(self, code=None, msg='Unknown error'):
1267         super().__init__(msg)
1268         self.code = code
1269         self.msg = msg
1270
1271         # Parsing code and msg
1272         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1273                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1274             self.reason = 'NO_SPACE'
1275         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1276             self.reason = 'VALUE_TOO_LONG'
1277         else:
1278             self.reason = 'NOT_SUPPORTED'
1279
1280
1281 class XAttrUnavailableError(YoutubeDLError):
1282     pass
1283
1284
1285 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1286     hc = http_class(*args, **kwargs)
1287     source_address = ydl_handler._params.get('source_address')
1288
1289     if source_address is not None:
1290         # This is to workaround _create_connection() from socket where it will try all
1291         # address data from getaddrinfo() including IPv6. This filters the result from
1292         # getaddrinfo() based on the source_address value.
1293         # This is based on the cpython socket.create_connection() function.
1294         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1295         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1296             host, port = address
1297             err = None
1298             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1299             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1300             ip_addrs = [addr for addr in addrs if addr[0] == af]
1301             if addrs and not ip_addrs:
1302                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1303                 raise OSError(
1304                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1305                     % (ip_version, source_address[0]))
1306             for res in ip_addrs:
1307                 af, socktype, proto, canonname, sa = res
1308                 sock = None
1309                 try:
1310                     sock = socket.socket(af, socktype, proto)
1311                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1312                         sock.settimeout(timeout)
1313                     sock.bind(source_address)
1314                     sock.connect(sa)
1315                     err = None  # Explicitly break reference cycle
1316                     return sock
1317                 except OSError as _:
1318                     err = _
1319                     if sock is not None:
1320                         sock.close()
1321             if err is not None:
1322                 raise err
1323             else:
1324                 raise OSError('getaddrinfo returns an empty list')
1325         if hasattr(hc, '_create_connection'):
1326             hc._create_connection = _create_connection
1327         hc.source_address = (source_address, 0)
1328
1329     return hc
1330
1331
1332 def handle_youtubedl_headers(headers):
1333     filtered_headers = headers
1334
1335     if 'Youtubedl-no-compression' in filtered_headers:
1336         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1337         del filtered_headers['Youtubedl-no-compression']
1338
1339     return filtered_headers
1340
1341
1342 class YoutubeDLHandler(urllib.request.HTTPHandler):
1343     """Handler for HTTP requests and responses.
1344
1345     This class, when installed with an OpenerDirector, automatically adds
1346     the standard headers to every HTTP request and handles gzipped and
1347     deflated responses from web servers. If compression is to be avoided in
1348     a particular request, the original request in the program code only has
1349     to include the HTTP header "Youtubedl-no-compression", which will be
1350     removed before making the real request.
1351
1352     Part of this code was copied from:
1353
1354     http://techknack.net/python-urllib2-handlers/
1355
1356     Andrew Rowls, the author of that code, agreed to release it to the
1357     public domain.
1358     """
1359
1360     def __init__(self, params, *args, **kwargs):
1361         urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
1362         self._params = params
1363
1364     def http_open(self, req):
1365         conn_class = http.client.HTTPConnection
1366
1367         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1368         if socks_proxy:
1369             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1370             del req.headers['Ytdl-socks-proxy']
1371
1372         return self.do_open(functools.partial(
1373             _create_http_connection, self, conn_class, False),
1374             req)
1375
1376     @staticmethod
1377     def deflate(data):
1378         if not data:
1379             return data
1380         try:
1381             return zlib.decompress(data, -zlib.MAX_WBITS)
1382         except zlib.error:
1383             return zlib.decompress(data)
1384
1385     @staticmethod
1386     def brotli(data):
1387         if not data:
1388             return data
1389         return brotli.decompress(data)
1390
1391     def http_request(self, req):
1392         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1393         # always respected by websites, some tend to give out URLs with non percent-encoded
1394         # non-ASCII characters (see telemb.py, ard.py [#3412])
1395         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1396         # To work around aforementioned issue we will replace request's original URL with
1397         # percent-encoded one
1398         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1399         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1400         url = req.get_full_url()
1401         url_escaped = escape_url(url)
1402
1403         # Substitute URL if any change after escaping
1404         if url != url_escaped:
1405             req = update_Request(req, url=url_escaped)
1406
1407         for h, v in self._params.get('http_headers', std_headers).items():
1408             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1409             # The dict keys are capitalized because of this bug by urllib
1410             if h.capitalize() not in req.headers:
1411                 req.add_header(h, v)
1412
1413         if 'Accept-encoding' not in req.headers:
1414             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1415
1416         req.headers = handle_youtubedl_headers(req.headers)
1417
1418         return super().do_request_(req)
1419
1420     def http_response(self, req, resp):
1421         old_resp = resp
1422         # gzip
1423         if resp.headers.get('Content-encoding', '') == 'gzip':
1424             content = resp.read()
1425             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1426             try:
1427                 uncompressed = io.BytesIO(gz.read())
1428             except OSError as original_ioerror:
1429                 # There may be junk add the end of the file
1430                 # See http://stackoverflow.com/q/4928560/35070 for details
1431                 for i in range(1, 1024):
1432                     try:
1433                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1434                         uncompressed = io.BytesIO(gz.read())
1435                     except OSError:
1436                         continue
1437                     break
1438                 else:
1439                     raise original_ioerror
1440             resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1441             resp.msg = old_resp.msg
1442             del resp.headers['Content-encoding']
1443         # deflate
1444         if resp.headers.get('Content-encoding', '') == 'deflate':
1445             gz = io.BytesIO(self.deflate(resp.read()))
1446             resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1447             resp.msg = old_resp.msg
1448             del resp.headers['Content-encoding']
1449         # brotli
1450         if resp.headers.get('Content-encoding', '') == 'br':
1451             resp = urllib.request.addinfourl(
1452                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1453             resp.msg = old_resp.msg
1454             del resp.headers['Content-encoding']
1455         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1456         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1457         if 300 <= resp.code < 400:
1458             location = resp.headers.get('Location')
1459             if location:
1460                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1461                 location = location.encode('iso-8859-1').decode()
1462                 location_escaped = escape_url(location)
1463                 if location != location_escaped:
1464                     del resp.headers['Location']
1465                     resp.headers['Location'] = location_escaped
1466         return resp
1467
1468     https_request = http_request
1469     https_response = http_response
1470
1471
1472 def make_socks_conn_class(base_class, socks_proxy):
1473     assert issubclass(base_class, (
1474         http.client.HTTPConnection, http.client.HTTPSConnection))
1475
1476     url_components = urllib.parse.urlparse(socks_proxy)
1477     if url_components.scheme.lower() == 'socks5':
1478         socks_type = ProxyType.SOCKS5
1479     elif url_components.scheme.lower() in ('socks', 'socks4'):
1480         socks_type = ProxyType.SOCKS4
1481     elif url_components.scheme.lower() == 'socks4a':
1482         socks_type = ProxyType.SOCKS4A
1483
1484     def unquote_if_non_empty(s):
1485         if not s:
1486             return s
1487         return urllib.parse.unquote_plus(s)
1488
1489     proxy_args = (
1490         socks_type,
1491         url_components.hostname, url_components.port or 1080,
1492         True,  # Remote DNS
1493         unquote_if_non_empty(url_components.username),
1494         unquote_if_non_empty(url_components.password),
1495     )
1496
1497     class SocksConnection(base_class):
1498         def connect(self):
1499             self.sock = sockssocket()
1500             self.sock.setproxy(*proxy_args)
1501             if isinstance(self.timeout, (int, float)):
1502                 self.sock.settimeout(self.timeout)
1503             self.sock.connect((self.host, self.port))
1504
1505             if isinstance(self, http.client.HTTPSConnection):
1506                 if hasattr(self, '_context'):  # Python > 2.6
1507                     self.sock = self._context.wrap_socket(
1508                         self.sock, server_hostname=self.host)
1509                 else:
1510                     self.sock = ssl.wrap_socket(self.sock)
1511
1512     return SocksConnection
1513
1514
1515 class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
1516     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1517         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
1518         self._https_conn_class = https_conn_class or http.client.HTTPSConnection
1519         self._params = params
1520
1521     def https_open(self, req):
1522         kwargs = {}
1523         conn_class = self._https_conn_class
1524
1525         if hasattr(self, '_context'):  # python > 2.6
1526             kwargs['context'] = self._context
1527         if hasattr(self, '_check_hostname'):  # python 3.x
1528             kwargs['check_hostname'] = self._check_hostname
1529
1530         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1531         if socks_proxy:
1532             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1533             del req.headers['Ytdl-socks-proxy']
1534
1535         try:
1536             return self.do_open(
1537                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1538         except urllib.error.URLError as e:
1539             if (isinstance(e.reason, ssl.SSLError)
1540                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1541                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1542             raise
1543
1544
1545 def is_path_like(f):
1546     return isinstance(f, (str, bytes, os.PathLike))
1547
1548
1549 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1550     """
1551     See [1] for cookie file format.
1552
1553     1. https://curl.haxx.se/docs/http-cookies.html
1554     """
1555     _HTTPONLY_PREFIX = '#HttpOnly_'
1556     _ENTRY_LEN = 7
1557     _HEADER = '''# Netscape HTTP Cookie File
1558 # This file is generated by yt-dlp.  Do not edit.
1559
1560 '''
1561     _CookieFileEntry = collections.namedtuple(
1562         'CookieFileEntry',
1563         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1564
1565     def __init__(self, filename=None, *args, **kwargs):
1566         super().__init__(None, *args, **kwargs)
1567         if is_path_like(filename):
1568             filename = os.fspath(filename)
1569         self.filename = filename
1570
1571     @staticmethod
1572     def _true_or_false(cndn):
1573         return 'TRUE' if cndn else 'FALSE'
1574
1575     @contextlib.contextmanager
1576     def open(self, file, *, write=False):
1577         if is_path_like(file):
1578             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1579                 yield f
1580         else:
1581             if write:
1582                 file.truncate(0)
1583             yield file
1584
1585     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1586         now = time.time()
1587         for cookie in self:
1588             if (not ignore_discard and cookie.discard
1589                     or not ignore_expires and cookie.is_expired(now)):
1590                 continue
1591             name, value = cookie.name, cookie.value
1592             if value is None:
1593                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1594                 # with no name, whereas http.cookiejar regards it as a
1595                 # cookie with no value.
1596                 name, value = '', name
1597             f.write('%s\n' % '\t'.join((
1598                 cookie.domain,
1599                 self._true_or_false(cookie.domain.startswith('.')),
1600                 cookie.path,
1601                 self._true_or_false(cookie.secure),
1602                 str_or_none(cookie.expires, default=''),
1603                 name, value
1604             )))
1605
1606     def save(self, filename=None, *args, **kwargs):
1607         """
1608         Save cookies to a file.
1609         Code is taken from CPython 3.6
1610         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1611
1612         if filename is None:
1613             if self.filename is not None:
1614                 filename = self.filename
1615             else:
1616                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1617
1618         # Store session cookies with `expires` set to 0 instead of an empty string
1619         for cookie in self:
1620             if cookie.expires is None:
1621                 cookie.expires = 0
1622
1623         with self.open(filename, write=True) as f:
1624             f.write(self._HEADER)
1625             self._really_save(f, *args, **kwargs)
1626
1627     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1628         """Load cookies from a file."""
1629         if filename is None:
1630             if self.filename is not None:
1631                 filename = self.filename
1632             else:
1633                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1634
1635         def prepare_line(line):
1636             if line.startswith(self._HTTPONLY_PREFIX):
1637                 line = line[len(self._HTTPONLY_PREFIX):]
1638             # comments and empty lines are fine
1639             if line.startswith('#') or not line.strip():
1640                 return line
1641             cookie_list = line.split('\t')
1642             if len(cookie_list) != self._ENTRY_LEN:
1643                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1644             cookie = self._CookieFileEntry(*cookie_list)
1645             if cookie.expires_at and not cookie.expires_at.isdigit():
1646                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1647             return line
1648
1649         cf = io.StringIO()
1650         with self.open(filename) as f:
1651             for line in f:
1652                 try:
1653                     cf.write(prepare_line(line))
1654                 except http.cookiejar.LoadError as e:
1655                     if f'{line.strip()} '[0] in '[{"':
1656                         raise http.cookiejar.LoadError(
1657                             'Cookies file must be Netscape formatted, not JSON. See  '
1658                             'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1659                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1660                     continue
1661         cf.seek(0)
1662         self._really_load(cf, filename, ignore_discard, ignore_expires)
1663         # Session cookies are denoted by either `expires` field set to
1664         # an empty string or 0. MozillaCookieJar only recognizes the former
1665         # (see [1]). So we need force the latter to be recognized as session
1666         # cookies on our own.
1667         # Session cookies may be important for cookies-based authentication,
1668         # e.g. usually, when user does not check 'Remember me' check box while
1669         # logging in on a site, some important cookies are stored as session
1670         # cookies so that not recognizing them will result in failed login.
1671         # 1. https://bugs.python.org/issue17164
1672         for cookie in self:
1673             # Treat `expires=0` cookies as session cookies
1674             if cookie.expires == 0:
1675                 cookie.expires = None
1676                 cookie.discard = True
1677
1678
1679 class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
1680     def __init__(self, cookiejar=None):
1681         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
1682
1683     def http_response(self, request, response):
1684         return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
1685
1686     https_request = urllib.request.HTTPCookieProcessor.http_request
1687     https_response = http_response
1688
1689
1690 class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
1691     """YoutubeDL redirect handler
1692
1693     The code is based on HTTPRedirectHandler implementation from CPython [1].
1694
1695     This redirect handler solves two issues:
1696      - ensures redirect URL is always unicode under python 2
1697      - introduces support for experimental HTTP response status code
1698        308 Permanent Redirect [2] used by some sites [3]
1699
1700     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1701     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1702     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1703     """
1704
1705     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
1706
1707     def redirect_request(self, req, fp, code, msg, headers, newurl):
1708         """Return a Request or None in response to a redirect.
1709
1710         This is called by the http_error_30x methods when a
1711         redirection response is received.  If a redirection should
1712         take place, return a new Request to allow http_error_30x to
1713         perform the redirect.  Otherwise, raise HTTPError if no-one
1714         else should try to handle this url.  Return None if you can't
1715         but another Handler might.
1716         """
1717         m = req.get_method()
1718         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1719                  or code in (301, 302, 303) and m == "POST")):
1720             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
1721         # Strictly (according to RFC 2616), 301 or 302 in response to
1722         # a POST MUST NOT cause a redirection without confirmation
1723         # from the user (of urllib.request, in this case).  In practice,
1724         # essentially all clients do redirect in this case, so we do
1725         # the same.
1726
1727         # Be conciliant with URIs containing a space.  This is mainly
1728         # redundant with the more complete encoding done in http_error_302(),
1729         # but it is kept for compatibility with other callers.
1730         newurl = newurl.replace(' ', '%20')
1731
1732         CONTENT_HEADERS = ("content-length", "content-type")
1733         # NB: don't use dict comprehension for python 2.6 compatibility
1734         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1735
1736         # A 303 must either use GET or HEAD for subsequent request
1737         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1738         if code == 303 and m != 'HEAD':
1739             m = 'GET'
1740         # 301 and 302 redirects are commonly turned into a GET from a POST
1741         # for subsequent requests by browsers, so we'll do the same.
1742         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1743         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1744         if code in (301, 302) and m == 'POST':
1745             m = 'GET'
1746
1747         return urllib.request.Request(
1748             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1749             unverifiable=True, method=m)
1750
1751
1752 def extract_timezone(date_str):
1753     m = re.search(
1754         r'''(?x)
1755             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1756             (?P<tz>Z|                                            # just the UTC Z, or
1757                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1758                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1759                    [ ]?                                          # optional space
1760                 (?P<sign>\+|-)                                   # +/-
1761                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1762             $)
1763         ''', date_str)
1764     if not m:
1765         m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1766         timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
1767         if timezone is not None:
1768             date_str = date_str[:-len(m.group('tz'))]
1769         timezone = datetime.timedelta(hours=timezone or 0)
1770     else:
1771         date_str = date_str[:-len(m.group('tz'))]
1772         if not m.group('sign'):
1773             timezone = datetime.timedelta()
1774         else:
1775             sign = 1 if m.group('sign') == '+' else -1
1776             timezone = datetime.timedelta(
1777                 hours=sign * int(m.group('hours')),
1778                 minutes=sign * int(m.group('minutes')))
1779     return timezone, date_str
1780
1781
1782 def parse_iso8601(date_str, delimiter='T', timezone=None):
1783     """ Return a UNIX timestamp from the given date """
1784
1785     if date_str is None:
1786         return None
1787
1788     date_str = re.sub(r'\.[0-9]+', '', date_str)
1789
1790     if timezone is None:
1791         timezone, date_str = extract_timezone(date_str)
1792
1793     with contextlib.suppress(ValueError):
1794         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1795         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1796         return calendar.timegm(dt.timetuple())
1797
1798
1799 def date_formats(day_first=True):
1800     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1801
1802
1803 def unified_strdate(date_str, day_first=True):
1804     """Return a string with the date in the format YYYYMMDD"""
1805
1806     if date_str is None:
1807         return None
1808     upload_date = None
1809     # Replace commas
1810     date_str = date_str.replace(',', ' ')
1811     # Remove AM/PM + timezone
1812     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1813     _, date_str = extract_timezone(date_str)
1814
1815     for expression in date_formats(day_first):
1816         with contextlib.suppress(ValueError):
1817             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1818     if upload_date is None:
1819         timetuple = email.utils.parsedate_tz(date_str)
1820         if timetuple:
1821             with contextlib.suppress(ValueError):
1822                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1823     if upload_date is not None:
1824         return str(upload_date)
1825
1826
1827 def unified_timestamp(date_str, day_first=True):
1828     if date_str is None:
1829         return None
1830
1831     date_str = re.sub(r'\s+', ' ', re.sub(
1832         r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
1833
1834     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1835     timezone, date_str = extract_timezone(date_str)
1836
1837     # Remove AM/PM + timezone
1838     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1839
1840     # Remove unrecognized timezones from ISO 8601 alike timestamps
1841     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1842     if m:
1843         date_str = date_str[:-len(m.group('tz'))]
1844
1845     # Python only supports microseconds, so remove nanoseconds
1846     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1847     if m:
1848         date_str = m.group(1)
1849
1850     for expression in date_formats(day_first):
1851         with contextlib.suppress(ValueError):
1852             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1853             return calendar.timegm(dt.timetuple())
1854
1855     timetuple = email.utils.parsedate_tz(date_str)
1856     if timetuple:
1857         return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
1858
1859
1860 def determine_ext(url, default_ext='unknown_video'):
1861     if url is None or '.' not in url:
1862         return default_ext
1863     guess = url.partition('?')[0].rpartition('.')[2]
1864     if re.match(r'^[A-Za-z0-9]+$', guess):
1865         return guess
1866     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1867     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1868         return guess.rstrip('/')
1869     else:
1870         return default_ext
1871
1872
1873 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1874     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1875
1876
1877 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1878     R"""
1879     Return a datetime object from a string.
1880     Supported format:
1881         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1882
1883     @param format       strftime format of DATE
1884     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1885                         auto: round to the unit provided in date_str (if applicable).
1886     """
1887     auto_precision = False
1888     if precision == 'auto':
1889         auto_precision = True
1890         precision = 'microsecond'
1891     today = datetime_round(datetime.datetime.utcnow(), precision)
1892     if date_str in ('now', 'today'):
1893         return today
1894     if date_str == 'yesterday':
1895         return today - datetime.timedelta(days=1)
1896     match = re.match(
1897         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1898         date_str)
1899     if match is not None:
1900         start_time = datetime_from_str(match.group('start'), precision, format)
1901         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1902         unit = match.group('unit')
1903         if unit == 'month' or unit == 'year':
1904             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1905             unit = 'day'
1906         else:
1907             if unit == 'week':
1908                 unit = 'day'
1909                 time *= 7
1910             delta = datetime.timedelta(**{unit + 's': time})
1911             new_date = start_time + delta
1912         if auto_precision:
1913             return datetime_round(new_date, unit)
1914         return new_date
1915
1916     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1917
1918
1919 def date_from_str(date_str, format='%Y%m%d', strict=False):
1920     R"""
1921     Return a date object from a string using datetime_from_str
1922
1923     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1924                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1925     """
1926     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1927         raise ValueError(f'Invalid date format "{date_str}"')
1928     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1929
1930
1931 def datetime_add_months(dt, months):
1932     """Increment/Decrement a datetime object by months."""
1933     month = dt.month + months - 1
1934     year = dt.year + month // 12
1935     month = month % 12 + 1
1936     day = min(dt.day, calendar.monthrange(year, month)[1])
1937     return dt.replace(year, month, day)
1938
1939
1940 def datetime_round(dt, precision='day'):
1941     """
1942     Round a datetime object's time to a specific precision
1943     """
1944     if precision == 'microsecond':
1945         return dt
1946
1947     unit_seconds = {
1948         'day': 86400,
1949         'hour': 3600,
1950         'minute': 60,
1951         'second': 1,
1952     }
1953     roundto = lambda x, n: ((x + n / 2) // n) * n
1954     timestamp = calendar.timegm(dt.timetuple())
1955     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1956
1957
1958 def hyphenate_date(date_str):
1959     """
1960     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1961     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1962     if match is not None:
1963         return '-'.join(match.groups())
1964     else:
1965         return date_str
1966
1967
1968 class DateRange:
1969     """Represents a time interval between two dates"""
1970
1971     def __init__(self, start=None, end=None):
1972         """start and end must be strings in the format accepted by date"""
1973         if start is not None:
1974             self.start = date_from_str(start, strict=True)
1975         else:
1976             self.start = datetime.datetime.min.date()
1977         if end is not None:
1978             self.end = date_from_str(end, strict=True)
1979         else:
1980             self.end = datetime.datetime.max.date()
1981         if self.start > self.end:
1982             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1983
1984     @classmethod
1985     def day(cls, day):
1986         """Returns a range that only contains the given day"""
1987         return cls(day, day)
1988
1989     def __contains__(self, date):
1990         """Check if the date is in the range"""
1991         if not isinstance(date, datetime.date):
1992             date = date_from_str(date)
1993         return self.start <= date <= self.end
1994
1995     def __str__(self):
1996         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1997
1998     def __eq__(self, other):
1999         return (isinstance(other, DateRange)
2000                 and self.start == other.start and self.end == other.end)
2001
2002
2003 def platform_name():
2004     """ Returns the platform name as a str """
2005     deprecation_warning(f'"{__name__}.platform_name" is deprecated, use "platform.platform" instead')
2006     return platform.platform()
2007
2008
2009 @functools.cache
2010 def system_identifier():
2011     python_implementation = platform.python_implementation()
2012     if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2013         python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
2014     libc_ver = []
2015     with contextlib.suppress(OSError):  # We may not have access to the executable
2016         libc_ver = platform.libc_ver()
2017
2018     return 'Python %s (%s %s %s) - %s (%s%s)' % (
2019         platform.python_version(),
2020         python_implementation,
2021         platform.machine(),
2022         platform.architecture()[0],
2023         platform.platform(),
2024         ssl.OPENSSL_VERSION,
2025         format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
2026     )
2027
2028
2029 @functools.cache
2030 def get_windows_version():
2031     ''' Get Windows version. returns () if it's not running on Windows '''
2032     if compat_os_name == 'nt':
2033         return version_tuple(platform.win32_ver()[1])
2034     else:
2035         return ()
2036
2037
2038 def write_string(s, out=None, encoding=None):
2039     assert isinstance(s, str)
2040     out = out or sys.stderr
2041
2042     if compat_os_name == 'nt' and supports_terminal_sequences(out):
2043         s = re.sub(r'([\r\n]+)', r' \1', s)
2044
2045     enc, buffer = None, out
2046     if 'b' in getattr(out, 'mode', ''):
2047         enc = encoding or preferredencoding()
2048     elif hasattr(out, 'buffer'):
2049         buffer = out.buffer
2050         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
2051
2052     buffer.write(s.encode(enc, 'ignore') if enc else s)
2053     out.flush()
2054
2055
2056 def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
2057     from . import _IN_CLI
2058     if _IN_CLI:
2059         if msg in deprecation_warning._cache:
2060             return
2061         deprecation_warning._cache.add(msg)
2062         if printer:
2063             return printer(f'{msg}{bug_reports_message()}', **kwargs)
2064         return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
2065     else:
2066         import warnings
2067         warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
2068
2069
2070 deprecation_warning._cache = set()
2071
2072
2073 def bytes_to_intlist(bs):
2074     if not bs:
2075         return []
2076     if isinstance(bs[0], int):  # Python 3
2077         return list(bs)
2078     else:
2079         return [ord(c) for c in bs]
2080
2081
2082 def intlist_to_bytes(xs):
2083     if not xs:
2084         return b''
2085     return struct.pack('%dB' % len(xs), *xs)
2086
2087
2088 class LockingUnsupportedError(OSError):
2089     msg = 'File locking is not supported'
2090
2091     def __init__(self):
2092         super().__init__(self.msg)
2093
2094
2095 # Cross-platform file locking
2096 if sys.platform == 'win32':
2097     import ctypes
2098     import ctypes.wintypes
2099     import msvcrt
2100
2101     class OVERLAPPED(ctypes.Structure):
2102         _fields_ = [
2103             ('Internal', ctypes.wintypes.LPVOID),
2104             ('InternalHigh', ctypes.wintypes.LPVOID),
2105             ('Offset', ctypes.wintypes.DWORD),
2106             ('OffsetHigh', ctypes.wintypes.DWORD),
2107             ('hEvent', ctypes.wintypes.HANDLE),
2108         ]
2109
2110     kernel32 = ctypes.windll.kernel32
2111     LockFileEx = kernel32.LockFileEx
2112     LockFileEx.argtypes = [
2113         ctypes.wintypes.HANDLE,     # hFile
2114         ctypes.wintypes.DWORD,      # dwFlags
2115         ctypes.wintypes.DWORD,      # dwReserved
2116         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2117         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2118         ctypes.POINTER(OVERLAPPED)  # Overlapped
2119     ]
2120     LockFileEx.restype = ctypes.wintypes.BOOL
2121     UnlockFileEx = kernel32.UnlockFileEx
2122     UnlockFileEx.argtypes = [
2123         ctypes.wintypes.HANDLE,     # hFile
2124         ctypes.wintypes.DWORD,      # dwReserved
2125         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2126         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2127         ctypes.POINTER(OVERLAPPED)  # Overlapped
2128     ]
2129     UnlockFileEx.restype = ctypes.wintypes.BOOL
2130     whole_low = 0xffffffff
2131     whole_high = 0x7fffffff
2132
2133     def _lock_file(f, exclusive, block):
2134         overlapped = OVERLAPPED()
2135         overlapped.Offset = 0
2136         overlapped.OffsetHigh = 0
2137         overlapped.hEvent = 0
2138         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2139
2140         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2141                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2142                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2143             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2144             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2145
2146     def _unlock_file(f):
2147         assert f._lock_file_overlapped_p
2148         handle = msvcrt.get_osfhandle(f.fileno())
2149         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2150             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2151
2152 else:
2153     try:
2154         import fcntl
2155
2156         def _lock_file(f, exclusive, block):
2157             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2158             if not block:
2159                 flags |= fcntl.LOCK_NB
2160             try:
2161                 fcntl.flock(f, flags)
2162             except BlockingIOError:
2163                 raise
2164             except OSError:  # AOSP does not have flock()
2165                 fcntl.lockf(f, flags)
2166
2167         def _unlock_file(f):
2168             try:
2169                 fcntl.flock(f, fcntl.LOCK_UN)
2170             except OSError:
2171                 fcntl.lockf(f, fcntl.LOCK_UN)
2172
2173     except ImportError:
2174
2175         def _lock_file(f, exclusive, block):
2176             raise LockingUnsupportedError()
2177
2178         def _unlock_file(f):
2179             raise LockingUnsupportedError()
2180
2181
2182 class locked_file:
2183     locked = False
2184
2185     def __init__(self, filename, mode, block=True, encoding=None):
2186         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2187             raise NotImplementedError(mode)
2188         self.mode, self.block = mode, block
2189
2190         writable = any(f in mode for f in 'wax+')
2191         readable = any(f in mode for f in 'r+')
2192         flags = functools.reduce(operator.ior, (
2193             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2194             getattr(os, 'O_BINARY', 0),  # Windows only
2195             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2196             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2197             os.O_APPEND if 'a' in mode else 0,
2198             os.O_EXCL if 'x' in mode else 0,
2199             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2200         ))
2201
2202         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2203
2204     def __enter__(self):
2205         exclusive = 'r' not in self.mode
2206         try:
2207             _lock_file(self.f, exclusive, self.block)
2208             self.locked = True
2209         except OSError:
2210             self.f.close()
2211             raise
2212         if 'w' in self.mode:
2213             try:
2214                 self.f.truncate()
2215             except OSError as e:
2216                 if e.errno not in (
2217                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2218                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2219                 ):
2220                     raise
2221         return self
2222
2223     def unlock(self):
2224         if not self.locked:
2225             return
2226         try:
2227             _unlock_file(self.f)
2228         finally:
2229             self.locked = False
2230
2231     def __exit__(self, *_):
2232         try:
2233             self.unlock()
2234         finally:
2235             self.f.close()
2236
2237     open = __enter__
2238     close = __exit__
2239
2240     def __getattr__(self, attr):
2241         return getattr(self.f, attr)
2242
2243     def __iter__(self):
2244         return iter(self.f)
2245
2246
2247 @functools.cache
2248 def get_filesystem_encoding():
2249     encoding = sys.getfilesystemencoding()
2250     return encoding if encoding is not None else 'utf-8'
2251
2252
2253 def shell_quote(args):
2254     quoted_args = []
2255     encoding = get_filesystem_encoding()
2256     for a in args:
2257         if isinstance(a, bytes):
2258             # We may get a filename encoded with 'encodeFilename'
2259             a = a.decode(encoding)
2260         quoted_args.append(compat_shlex_quote(a))
2261     return ' '.join(quoted_args)
2262
2263
2264 def smuggle_url(url, data):
2265     """ Pass additional data in a URL for internal use. """
2266
2267     url, idata = unsmuggle_url(url, {})
2268     data.update(idata)
2269     sdata = urllib.parse.urlencode(
2270         {'__youtubedl_smuggle': json.dumps(data)})
2271     return url + '#' + sdata
2272
2273
2274 def unsmuggle_url(smug_url, default=None):
2275     if '#__youtubedl_smuggle' not in smug_url:
2276         return smug_url, default
2277     url, _, sdata = smug_url.rpartition('#')
2278     jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
2279     data = json.loads(jsond)
2280     return url, data
2281
2282
2283 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2284     """ Formats numbers with decimal sufixes like K, M, etc """
2285     num, factor = float_or_none(num), float(factor)
2286     if num is None or num < 0:
2287         return None
2288     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2289     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2290     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2291     if factor == 1024:
2292         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2293     converted = num / (factor ** exponent)
2294     return fmt % (converted, suffix)
2295
2296
2297 def format_bytes(bytes):
2298     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2299
2300
2301 def lookup_unit_table(unit_table, s, strict=False):
2302     num_re = NUMBER_RE if strict else NUMBER_RE.replace(R'\.', '[,.]')
2303     units_re = '|'.join(re.escape(u) for u in unit_table)
2304     m = (re.fullmatch if strict else re.match)(
2305         rf'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s)
2306     if not m:
2307         return None
2308
2309     num = float(m.group('num').replace(',', '.'))
2310     mult = unit_table[m.group('unit')]
2311     return round(num * mult)
2312
2313
2314 def parse_bytes(s):
2315     """Parse a string indicating a byte quantity into an integer"""
2316     return lookup_unit_table(
2317         {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])},
2318         s.upper(), strict=True)
2319
2320
2321 def parse_filesize(s):
2322     if s is None:
2323         return None
2324
2325     # The lower-case forms are of course incorrect and unofficial,
2326     # but we support those too
2327     _UNIT_TABLE = {
2328         'B': 1,
2329         'b': 1,
2330         'bytes': 1,
2331         'KiB': 1024,
2332         'KB': 1000,
2333         'kB': 1024,
2334         'Kb': 1000,
2335         'kb': 1000,
2336         'kilobytes': 1000,
2337         'kibibytes': 1024,
2338         'MiB': 1024 ** 2,
2339         'MB': 1000 ** 2,
2340         'mB': 1024 ** 2,
2341         'Mb': 1000 ** 2,
2342         'mb': 1000 ** 2,
2343         'megabytes': 1000 ** 2,
2344         'mebibytes': 1024 ** 2,
2345         'GiB': 1024 ** 3,
2346         'GB': 1000 ** 3,
2347         'gB': 1024 ** 3,
2348         'Gb': 1000 ** 3,
2349         'gb': 1000 ** 3,
2350         'gigabytes': 1000 ** 3,
2351         'gibibytes': 1024 ** 3,
2352         'TiB': 1024 ** 4,
2353         'TB': 1000 ** 4,
2354         'tB': 1024 ** 4,
2355         'Tb': 1000 ** 4,
2356         'tb': 1000 ** 4,
2357         'terabytes': 1000 ** 4,
2358         'tebibytes': 1024 ** 4,
2359         'PiB': 1024 ** 5,
2360         'PB': 1000 ** 5,
2361         'pB': 1024 ** 5,
2362         'Pb': 1000 ** 5,
2363         'pb': 1000 ** 5,
2364         'petabytes': 1000 ** 5,
2365         'pebibytes': 1024 ** 5,
2366         'EiB': 1024 ** 6,
2367         'EB': 1000 ** 6,
2368         'eB': 1024 ** 6,
2369         'Eb': 1000 ** 6,
2370         'eb': 1000 ** 6,
2371         'exabytes': 1000 ** 6,
2372         'exbibytes': 1024 ** 6,
2373         'ZiB': 1024 ** 7,
2374         'ZB': 1000 ** 7,
2375         'zB': 1024 ** 7,
2376         'Zb': 1000 ** 7,
2377         'zb': 1000 ** 7,
2378         'zettabytes': 1000 ** 7,
2379         'zebibytes': 1024 ** 7,
2380         'YiB': 1024 ** 8,
2381         'YB': 1000 ** 8,
2382         'yB': 1024 ** 8,
2383         'Yb': 1000 ** 8,
2384         'yb': 1000 ** 8,
2385         'yottabytes': 1000 ** 8,
2386         'yobibytes': 1024 ** 8,
2387     }
2388
2389     return lookup_unit_table(_UNIT_TABLE, s)
2390
2391
2392 def parse_count(s):
2393     if s is None:
2394         return None
2395
2396     s = re.sub(r'^[^\d]+\s', '', s).strip()
2397
2398     if re.match(r'^[\d,.]+$', s):
2399         return str_to_int(s)
2400
2401     _UNIT_TABLE = {
2402         'k': 1000,
2403         'K': 1000,
2404         'm': 1000 ** 2,
2405         'M': 1000 ** 2,
2406         'kk': 1000 ** 2,
2407         'KK': 1000 ** 2,
2408         'b': 1000 ** 3,
2409         'B': 1000 ** 3,
2410     }
2411
2412     ret = lookup_unit_table(_UNIT_TABLE, s)
2413     if ret is not None:
2414         return ret
2415
2416     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2417     if mobj:
2418         return str_to_int(mobj.group(1))
2419
2420
2421 def parse_resolution(s, *, lenient=False):
2422     if s is None:
2423         return {}
2424
2425     if lenient:
2426         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2427     else:
2428         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2429     if mobj:
2430         return {
2431             'width': int(mobj.group('w')),
2432             'height': int(mobj.group('h')),
2433         }
2434
2435     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2436     if mobj:
2437         return {'height': int(mobj.group(1))}
2438
2439     mobj = re.search(r'\b([48])[kK]\b', s)
2440     if mobj:
2441         return {'height': int(mobj.group(1)) * 540}
2442
2443     return {}
2444
2445
2446 def parse_bitrate(s):
2447     if not isinstance(s, str):
2448         return
2449     mobj = re.search(r'\b(\d+)\s*kbps', s)
2450     if mobj:
2451         return int(mobj.group(1))
2452
2453
2454 def month_by_name(name, lang='en'):
2455     """ Return the number of a month by (locale-independently) English name """
2456
2457     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2458
2459     try:
2460         return month_names.index(name) + 1
2461     except ValueError:
2462         return None
2463
2464
2465 def month_by_abbreviation(abbrev):
2466     """ Return the number of a month by (locale-independently) English
2467         abbreviations """
2468
2469     try:
2470         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2471     except ValueError:
2472         return None
2473
2474
2475 def fix_xml_ampersands(xml_str):
2476     """Replace all the '&' by '&amp;' in XML"""
2477     return re.sub(
2478         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2479         '&amp;',
2480         xml_str)
2481
2482
2483 def setproctitle(title):
2484     assert isinstance(title, str)
2485
2486     # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
2487     try:
2488         import ctypes
2489     except ImportError:
2490         return
2491
2492     try:
2493         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2494     except OSError:
2495         return
2496     except TypeError:
2497         # LoadLibrary in Windows Python 2.7.13 only expects
2498         # a bytestring, but since unicode_literals turns
2499         # every string into a unicode string, it fails.
2500         return
2501     title_bytes = title.encode()
2502     buf = ctypes.create_string_buffer(len(title_bytes))
2503     buf.value = title_bytes
2504     try:
2505         libc.prctl(15, buf, 0, 0, 0)
2506     except AttributeError:
2507         return  # Strange libc, just skip this
2508
2509
2510 def remove_start(s, start):
2511     return s[len(start):] if s is not None and s.startswith(start) else s
2512
2513
2514 def remove_end(s, end):
2515     return s[:-len(end)] if s is not None and s.endswith(end) else s
2516
2517
2518 def remove_quotes(s):
2519     if s is None or len(s) < 2:
2520         return s
2521     for quote in ('"', "'", ):
2522         if s[0] == quote and s[-1] == quote:
2523             return s[1:-1]
2524     return s
2525
2526
2527 def get_domain(url):
2528     """
2529     This implementation is inconsistent, but is kept for compatibility.
2530     Use this only for "webpage_url_domain"
2531     """
2532     return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
2533
2534
2535 def url_basename(url):
2536     path = urllib.parse.urlparse(url).path
2537     return path.strip('/').split('/')[-1]
2538
2539
2540 def base_url(url):
2541     return re.match(r'https?://[^?#]+/', url).group()
2542
2543
2544 def urljoin(base, path):
2545     if isinstance(path, bytes):
2546         path = path.decode()
2547     if not isinstance(path, str) or not path:
2548         return None
2549     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2550         return path
2551     if isinstance(base, bytes):
2552         base = base.decode()
2553     if not isinstance(base, str) or not re.match(
2554             r'^(?:https?:)?//', base):
2555         return None
2556     return urllib.parse.urljoin(base, path)
2557
2558
2559 class HEADRequest(urllib.request.Request):
2560     def get_method(self):
2561         return 'HEAD'
2562
2563
2564 class PUTRequest(urllib.request.Request):
2565     def get_method(self):
2566         return 'PUT'
2567
2568
2569 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2570     if get_attr and v is not None:
2571         v = getattr(v, get_attr, None)
2572     try:
2573         return int(v) * invscale // scale
2574     except (ValueError, TypeError, OverflowError):
2575         return default
2576
2577
2578 def str_or_none(v, default=None):
2579     return default if v is None else str(v)
2580
2581
2582 def str_to_int(int_str):
2583     """ A more relaxed version of int_or_none """
2584     if isinstance(int_str, int):
2585         return int_str
2586     elif isinstance(int_str, str):
2587         int_str = re.sub(r'[,\.\+]', '', int_str)
2588         return int_or_none(int_str)
2589
2590
2591 def float_or_none(v, scale=1, invscale=1, default=None):
2592     if v is None:
2593         return default
2594     try:
2595         return float(v) * invscale / scale
2596     except (ValueError, TypeError):
2597         return default
2598
2599
2600 def bool_or_none(v, default=None):
2601     return v if isinstance(v, bool) else default
2602
2603
2604 def strip_or_none(v, default=None):
2605     return v.strip() if isinstance(v, str) else default
2606
2607
2608 def url_or_none(url):
2609     if not url or not isinstance(url, str):
2610         return None
2611     url = url.strip()
2612     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2613
2614
2615 def request_to_url(req):
2616     if isinstance(req, urllib.request.Request):
2617         return req.get_full_url()
2618     else:
2619         return req
2620
2621
2622 def strftime_or_none(timestamp, date_format, default=None):
2623     datetime_object = None
2624     try:
2625         if isinstance(timestamp, (int, float)):  # unix timestamp
2626             # Using naive datetime here can break timestamp() in Windows
2627             # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
2628             datetime_object = datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
2629         elif isinstance(timestamp, str):  # assume YYYYMMDD
2630             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2631         date_format = re.sub(  # Support %s on windows
2632             r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
2633         return datetime_object.strftime(date_format)
2634     except (ValueError, TypeError, AttributeError):
2635         return default
2636
2637
2638 def parse_duration(s):
2639     if not isinstance(s, str):
2640         return None
2641     s = s.strip()
2642     if not s:
2643         return None
2644
2645     days, hours, mins, secs, ms = [None] * 5
2646     m = re.match(r'''(?x)
2647             (?P<before_secs>
2648                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2649             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2650             (?P<ms>[.:][0-9]+)?Z?$
2651         ''', s)
2652     if m:
2653         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2654     else:
2655         m = re.match(
2656             r'''(?ix)(?:P?
2657                 (?:
2658                     [0-9]+\s*y(?:ears?)?,?\s*
2659                 )?
2660                 (?:
2661                     [0-9]+\s*m(?:onths?)?,?\s*
2662                 )?
2663                 (?:
2664                     [0-9]+\s*w(?:eeks?)?,?\s*
2665                 )?
2666                 (?:
2667                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2668                 )?
2669                 T)?
2670                 (?:
2671                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2672                 )?
2673                 (?:
2674                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2675                 )?
2676                 (?:
2677                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2678                 )?Z?$''', s)
2679         if m:
2680             days, hours, mins, secs, ms = m.groups()
2681         else:
2682             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2683             if m:
2684                 hours, mins = m.groups()
2685             else:
2686                 return None
2687
2688     if ms:
2689         ms = ms.replace(':', '.')
2690     return sum(float(part or 0) * mult for part, mult in (
2691         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2692
2693
2694 def prepend_extension(filename, ext, expected_real_ext=None):
2695     name, real_ext = os.path.splitext(filename)
2696     return (
2697         f'{name}.{ext}{real_ext}'
2698         if not expected_real_ext or real_ext[1:] == expected_real_ext
2699         else f'{filename}.{ext}')
2700
2701
2702 def replace_extension(filename, ext, expected_real_ext=None):
2703     name, real_ext = os.path.splitext(filename)
2704     return '{}.{}'.format(
2705         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2706         ext)
2707
2708
2709 def check_executable(exe, args=[]):
2710     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2711     args can be a list of arguments for a short output (like -version) """
2712     try:
2713         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2714     except OSError:
2715         return False
2716     return exe
2717
2718
2719 def _get_exe_version_output(exe, args):
2720     try:
2721         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2722         # SIGTTOU if yt-dlp is run in the background.
2723         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2724         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2725                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2726     except OSError:
2727         return False
2728     return stdout
2729
2730
2731 def detect_exe_version(output, version_re=None, unrecognized='present'):
2732     assert isinstance(output, str)
2733     if version_re is None:
2734         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2735     m = re.search(version_re, output)
2736     if m:
2737         return m.group(1)
2738     else:
2739         return unrecognized
2740
2741
2742 def get_exe_version(exe, args=['--version'],
2743                     version_re=None, unrecognized='present'):
2744     """ Returns the version of the specified executable,
2745     or False if the executable is not present """
2746     out = _get_exe_version_output(exe, args)
2747     return detect_exe_version(out, version_re, unrecognized) if out else False
2748
2749
2750 def frange(start=0, stop=None, step=1):
2751     """Float range"""
2752     if stop is None:
2753         start, stop = 0, start
2754     sign = [-1, 1][step > 0] if step else 0
2755     while sign * start < sign * stop:
2756         yield start
2757         start += step
2758
2759
2760 class LazyList(collections.abc.Sequence):
2761     """Lazy immutable list from an iterable
2762     Note that slices of a LazyList are lists and not LazyList"""
2763
2764     class IndexError(IndexError):
2765         pass
2766
2767     def __init__(self, iterable, *, reverse=False, _cache=None):
2768         self._iterable = iter(iterable)
2769         self._cache = [] if _cache is None else _cache
2770         self._reversed = reverse
2771
2772     def __iter__(self):
2773         if self._reversed:
2774             # We need to consume the entire iterable to iterate in reverse
2775             yield from self.exhaust()
2776             return
2777         yield from self._cache
2778         for item in self._iterable:
2779             self._cache.append(item)
2780             yield item
2781
2782     def _exhaust(self):
2783         self._cache.extend(self._iterable)
2784         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2785         return self._cache
2786
2787     def exhaust(self):
2788         """Evaluate the entire iterable"""
2789         return self._exhaust()[::-1 if self._reversed else 1]
2790
2791     @staticmethod
2792     def _reverse_index(x):
2793         return None if x is None else ~x
2794
2795     def __getitem__(self, idx):
2796         if isinstance(idx, slice):
2797             if self._reversed:
2798                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2799             start, stop, step = idx.start, idx.stop, idx.step or 1
2800         elif isinstance(idx, int):
2801             if self._reversed:
2802                 idx = self._reverse_index(idx)
2803             start, stop, step = idx, idx, 0
2804         else:
2805             raise TypeError('indices must be integers or slices')
2806         if ((start or 0) < 0 or (stop or 0) < 0
2807                 or (start is None and step < 0)
2808                 or (stop is None and step > 0)):
2809             # We need to consume the entire iterable to be able to slice from the end
2810             # Obviously, never use this with infinite iterables
2811             self._exhaust()
2812             try:
2813                 return self._cache[idx]
2814             except IndexError as e:
2815                 raise self.IndexError(e) from e
2816         n = max(start or 0, stop or 0) - len(self._cache) + 1
2817         if n > 0:
2818             self._cache.extend(itertools.islice(self._iterable, n))
2819         try:
2820             return self._cache[idx]
2821         except IndexError as e:
2822             raise self.IndexError(e) from e
2823
2824     def __bool__(self):
2825         try:
2826             self[-1] if self._reversed else self[0]
2827         except self.IndexError:
2828             return False
2829         return True
2830
2831     def __len__(self):
2832         self._exhaust()
2833         return len(self._cache)
2834
2835     def __reversed__(self):
2836         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2837
2838     def __copy__(self):
2839         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2840
2841     def __repr__(self):
2842         # repr and str should mimic a list. So we exhaust the iterable
2843         return repr(self.exhaust())
2844
2845     def __str__(self):
2846         return repr(self.exhaust())
2847
2848
2849 class PagedList:
2850
2851     class IndexError(IndexError):
2852         pass
2853
2854     def __len__(self):
2855         # This is only useful for tests
2856         return len(self.getslice())
2857
2858     def __init__(self, pagefunc, pagesize, use_cache=True):
2859         self._pagefunc = pagefunc
2860         self._pagesize = pagesize
2861         self._pagecount = float('inf')
2862         self._use_cache = use_cache
2863         self._cache = {}
2864
2865     def getpage(self, pagenum):
2866         page_results = self._cache.get(pagenum)
2867         if page_results is None:
2868             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2869         if self._use_cache:
2870             self._cache[pagenum] = page_results
2871         return page_results
2872
2873     def getslice(self, start=0, end=None):
2874         return list(self._getslice(start, end))
2875
2876     def _getslice(self, start, end):
2877         raise NotImplementedError('This method must be implemented by subclasses')
2878
2879     def __getitem__(self, idx):
2880         assert self._use_cache, 'Indexing PagedList requires cache'
2881         if not isinstance(idx, int) or idx < 0:
2882             raise TypeError('indices must be non-negative integers')
2883         entries = self.getslice(idx, idx + 1)
2884         if not entries:
2885             raise self.IndexError()
2886         return entries[0]
2887
2888
2889 class OnDemandPagedList(PagedList):
2890     """Download pages until a page with less than maximum results"""
2891
2892     def _getslice(self, start, end):
2893         for pagenum in itertools.count(start // self._pagesize):
2894             firstid = pagenum * self._pagesize
2895             nextfirstid = pagenum * self._pagesize + self._pagesize
2896             if start >= nextfirstid:
2897                 continue
2898
2899             startv = (
2900                 start % self._pagesize
2901                 if firstid <= start < nextfirstid
2902                 else 0)
2903             endv = (
2904                 ((end - 1) % self._pagesize) + 1
2905                 if (end is not None and firstid <= end <= nextfirstid)
2906                 else None)
2907
2908             try:
2909                 page_results = self.getpage(pagenum)
2910             except Exception:
2911                 self._pagecount = pagenum - 1
2912                 raise
2913             if startv != 0 or endv is not None:
2914                 page_results = page_results[startv:endv]
2915             yield from page_results
2916
2917             # A little optimization - if current page is not "full", ie. does
2918             # not contain page_size videos then we can assume that this page
2919             # is the last one - there are no more ids on further pages -
2920             # i.e. no need to query again.
2921             if len(page_results) + startv < self._pagesize:
2922                 break
2923
2924             # If we got the whole page, but the next page is not interesting,
2925             # break out early as well
2926             if end == nextfirstid:
2927                 break
2928
2929
2930 class InAdvancePagedList(PagedList):
2931     """PagedList with total number of pages known in advance"""
2932
2933     def __init__(self, pagefunc, pagecount, pagesize):
2934         PagedList.__init__(self, pagefunc, pagesize, True)
2935         self._pagecount = pagecount
2936
2937     def _getslice(self, start, end):
2938         start_page = start // self._pagesize
2939         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2940         skip_elems = start - start_page * self._pagesize
2941         only_more = None if end is None else end - start
2942         for pagenum in range(start_page, end_page):
2943             page_results = self.getpage(pagenum)
2944             if skip_elems:
2945                 page_results = page_results[skip_elems:]
2946                 skip_elems = None
2947             if only_more is not None:
2948                 if len(page_results) < only_more:
2949                     only_more -= len(page_results)
2950                 else:
2951                     yield from page_results[:only_more]
2952                     break
2953             yield from page_results
2954
2955
2956 class PlaylistEntries:
2957     MissingEntry = object()
2958     is_exhausted = False
2959
2960     def __init__(self, ydl, info_dict):
2961         self.ydl = ydl
2962
2963         # _entries must be assigned now since infodict can change during iteration
2964         entries = info_dict.get('entries')
2965         if entries is None:
2966             raise EntryNotInPlaylist('There are no entries')
2967         elif isinstance(entries, list):
2968             self.is_exhausted = True
2969
2970         requested_entries = info_dict.get('requested_entries')
2971         self.is_incomplete = requested_entries is not None
2972         if self.is_incomplete:
2973             assert self.is_exhausted
2974             self._entries = [self.MissingEntry] * max(requested_entries or [0])
2975             for i, entry in zip(requested_entries, entries):
2976                 self._entries[i - 1] = entry
2977         elif isinstance(entries, (list, PagedList, LazyList)):
2978             self._entries = entries
2979         else:
2980             self._entries = LazyList(entries)
2981
2982     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2983         (?P<start>[+-]?\d+)?
2984         (?P<range>[:-]
2985             (?P<end>[+-]?\d+|inf(?:inite)?)?
2986             (?::(?P<step>[+-]?\d+))?
2987         )?''')
2988
2989     @classmethod
2990     def parse_playlist_items(cls, string):
2991         for segment in string.split(','):
2992             if not segment:
2993                 raise ValueError('There is two or more consecutive commas')
2994             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2995             if not mobj:
2996                 raise ValueError(f'{segment!r} is not a valid specification')
2997             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2998             if int_or_none(step) == 0:
2999                 raise ValueError(f'Step in {segment!r} cannot be zero')
3000             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
3001
3002     def get_requested_items(self):
3003         playlist_items = self.ydl.params.get('playlist_items')
3004         playlist_start = self.ydl.params.get('playliststart', 1)
3005         playlist_end = self.ydl.params.get('playlistend')
3006         # For backwards compatibility, interpret -1 as whole list
3007         if playlist_end in (-1, None):
3008             playlist_end = ''
3009         if not playlist_items:
3010             playlist_items = f'{playlist_start}:{playlist_end}'
3011         elif playlist_start != 1 or playlist_end:
3012             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
3013
3014         for index in self.parse_playlist_items(playlist_items):
3015             for i, entry in self[index]:
3016                 yield i, entry
3017                 if not entry:
3018                     continue
3019                 try:
3020                     # TODO: Add auto-generated fields
3021                     self.ydl._match_entry(entry, incomplete=True, silent=True)
3022                 except (ExistingVideoReached, RejectedVideoReached):
3023                     return
3024
3025     def get_full_count(self):
3026         if self.is_exhausted and not self.is_incomplete:
3027             return len(self)
3028         elif isinstance(self._entries, InAdvancePagedList):
3029             if self._entries._pagesize == 1:
3030                 return self._entries._pagecount
3031
3032     @functools.cached_property
3033     def _getter(self):
3034         if isinstance(self._entries, list):
3035             def get_entry(i):
3036                 try:
3037                     entry = self._entries[i]
3038                 except IndexError:
3039                     entry = self.MissingEntry
3040                     if not self.is_incomplete:
3041                         raise self.IndexError()
3042                 if entry is self.MissingEntry:
3043                     raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found')
3044                 return entry
3045         else:
3046             def get_entry(i):
3047                 try:
3048                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
3049                 except (LazyList.IndexError, PagedList.IndexError):
3050                     raise self.IndexError()
3051         return get_entry
3052
3053     def __getitem__(self, idx):
3054         if isinstance(idx, int):
3055             idx = slice(idx, idx)
3056
3057         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
3058         step = 1 if idx.step is None else idx.step
3059         if idx.start is None:
3060             start = 0 if step > 0 else len(self) - 1
3061         else:
3062             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
3063
3064         # NB: Do not call len(self) when idx == [:]
3065         if idx.stop is None:
3066             stop = 0 if step < 0 else float('inf')
3067         else:
3068             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
3069         stop += [-1, 1][step > 0]
3070
3071         for i in frange(start, stop, step):
3072             if i < 0:
3073                 continue
3074             try:
3075                 entry = self._getter(i)
3076             except self.IndexError:
3077                 self.is_exhausted = True
3078                 if step > 0:
3079                     break
3080                 continue
3081             yield i + 1, entry
3082
3083     def __len__(self):
3084         return len(tuple(self[:]))
3085
3086     class IndexError(IndexError):
3087         pass
3088
3089
3090 def uppercase_escape(s):
3091     unicode_escape = codecs.getdecoder('unicode_escape')
3092     return re.sub(
3093         r'\\U[0-9a-fA-F]{8}',
3094         lambda m: unicode_escape(m.group(0))[0],
3095         s)
3096
3097
3098 def lowercase_escape(s):
3099     unicode_escape = codecs.getdecoder('unicode_escape')
3100     return re.sub(
3101         r'\\u[0-9a-fA-F]{4}',
3102         lambda m: unicode_escape(m.group(0))[0],
3103         s)
3104
3105
3106 def escape_rfc3986(s):
3107     """Escape non-ASCII characters as suggested by RFC 3986"""
3108     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3109
3110
3111 def escape_url(url):
3112     """Escape URL as suggested by RFC 3986"""
3113     url_parsed = urllib.parse.urlparse(url)
3114     return url_parsed._replace(
3115         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3116         path=escape_rfc3986(url_parsed.path),
3117         params=escape_rfc3986(url_parsed.params),
3118         query=escape_rfc3986(url_parsed.query),
3119         fragment=escape_rfc3986(url_parsed.fragment)
3120     ).geturl()
3121
3122
3123 def parse_qs(url, **kwargs):
3124     return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
3125
3126
3127 def read_batch_urls(batch_fd):
3128     def fixup(url):
3129         if not isinstance(url, str):
3130             url = url.decode('utf-8', 'replace')
3131         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3132         for bom in BOM_UTF8:
3133             if url.startswith(bom):
3134                 url = url[len(bom):]
3135         url = url.lstrip()
3136         if not url or url.startswith(('#', ';', ']')):
3137             return False
3138         # "#" cannot be stripped out since it is part of the URI
3139         # However, it can be safely stripped out if following a whitespace
3140         return re.split(r'\s#', url, 1)[0].rstrip()
3141
3142     with contextlib.closing(batch_fd) as fd:
3143         return [url for url in map(fixup, fd) if url]
3144
3145
3146 def urlencode_postdata(*args, **kargs):
3147     return urllib.parse.urlencode(*args, **kargs).encode('ascii')
3148
3149
3150 def update_url_query(url, query):
3151     if not query:
3152         return url
3153     parsed_url = urllib.parse.urlparse(url)
3154     qs = urllib.parse.parse_qs(parsed_url.query)
3155     qs.update(query)
3156     return urllib.parse.urlunparse(parsed_url._replace(
3157         query=urllib.parse.urlencode(qs, True)))
3158
3159
3160 def update_Request(req, url=None, data=None, headers=None, query=None):
3161     req_headers = req.headers.copy()
3162     req_headers.update(headers or {})
3163     req_data = data or req.data
3164     req_url = update_url_query(url or req.get_full_url(), query)
3165     req_get_method = req.get_method()
3166     if req_get_method == 'HEAD':
3167         req_type = HEADRequest
3168     elif req_get_method == 'PUT':
3169         req_type = PUTRequest
3170     else:
3171         req_type = urllib.request.Request
3172     new_req = req_type(
3173         req_url, data=req_data, headers=req_headers,
3174         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3175     if hasattr(req, 'timeout'):
3176         new_req.timeout = req.timeout
3177     return new_req
3178
3179
3180 def _multipart_encode_impl(data, boundary):
3181     content_type = 'multipart/form-data; boundary=%s' % boundary
3182
3183     out = b''
3184     for k, v in data.items():
3185         out += b'--' + boundary.encode('ascii') + b'\r\n'
3186         if isinstance(k, str):
3187             k = k.encode()
3188         if isinstance(v, str):
3189             v = v.encode()
3190         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3191         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3192         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3193         if boundary.encode('ascii') in content:
3194             raise ValueError('Boundary overlaps with data')
3195         out += content
3196
3197     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3198
3199     return out, content_type
3200
3201
3202 def multipart_encode(data, boundary=None):
3203     '''
3204     Encode a dict to RFC 7578-compliant form-data
3205
3206     data:
3207         A dict where keys and values can be either Unicode or bytes-like
3208         objects.
3209     boundary:
3210         If specified a Unicode object, it's used as the boundary. Otherwise
3211         a random boundary is generated.
3212
3213     Reference: https://tools.ietf.org/html/rfc7578
3214     '''
3215     has_specified_boundary = boundary is not None
3216
3217     while True:
3218         if boundary is None:
3219             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3220
3221         try:
3222             out, content_type = _multipart_encode_impl(data, boundary)
3223             break
3224         except ValueError:
3225             if has_specified_boundary:
3226                 raise
3227             boundary = None
3228
3229     return out, content_type
3230
3231
3232 def variadic(x, allowed_types=(str, bytes, dict)):
3233     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
3234
3235
3236 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3237     for val in map(d.get, variadic(key_or_keys)):
3238         if val is not None and (val or not skip_false_values):
3239             return val
3240     return default
3241
3242
3243 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3244     for f in funcs:
3245         try:
3246             val = f(*args, **kwargs)
3247         except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
3248             pass
3249         else:
3250             if expected_type is None or isinstance(val, expected_type):
3251                 return val
3252
3253
3254 def try_get(src, getter, expected_type=None):
3255     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3256
3257
3258 def filter_dict(dct, cndn=lambda _, v: v is not None):
3259     return {k: v for k, v in dct.items() if cndn(k, v)}
3260
3261
3262 def merge_dicts(*dicts):
3263     merged = {}
3264     for a_dict in dicts:
3265         for k, v in a_dict.items():
3266             if (v is not None and k not in merged
3267                     or isinstance(v, str) and merged[k] == ''):
3268                 merged[k] = v
3269     return merged
3270
3271
3272 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3273     return string if isinstance(string, str) else str(string, encoding, errors)
3274
3275
3276 US_RATINGS = {
3277     'G': 0,
3278     'PG': 10,
3279     'PG-13': 13,
3280     'R': 16,
3281     'NC': 18,
3282 }
3283
3284
3285 TV_PARENTAL_GUIDELINES = {
3286     'TV-Y': 0,
3287     'TV-Y7': 7,
3288     'TV-G': 0,
3289     'TV-PG': 0,
3290     'TV-14': 14,
3291     'TV-MA': 17,
3292 }
3293
3294
3295 def parse_age_limit(s):
3296     # isinstance(False, int) is True. So type() must be used instead
3297     if type(s) is int:  # noqa: E721
3298         return s if 0 <= s <= 21 else None
3299     elif not isinstance(s, str):
3300         return None
3301     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3302     if m:
3303         return int(m.group('age'))
3304     s = s.upper()
3305     if s in US_RATINGS:
3306         return US_RATINGS[s]
3307     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3308     if m:
3309         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3310     return None
3311
3312
3313 def strip_jsonp(code):
3314     return re.sub(
3315         r'''(?sx)^
3316             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3317             (?:\s*&&\s*(?P=func_name))?
3318             \s*\(\s*(?P<callback_data>.*)\);?
3319             \s*?(?://[^\n]*)*$''',
3320         r'\g<callback_data>', code)
3321
3322
3323 def js_to_json(code, vars={}, *, strict=False):
3324     # vars is a dict of var, val pairs to substitute
3325     STRING_QUOTES = '\'"'
3326     STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
3327     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3328     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3329     INTEGER_TABLE = (
3330         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3331         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3332     )
3333
3334     def process_escape(match):
3335         JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
3336         escape = match.group(1) or match.group(2)
3337
3338         return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
3339                 else R'\u00' if escape == 'x'
3340                 else '' if escape == '\n'
3341                 else escape)
3342
3343     def fix_kv(m):
3344         v = m.group(0)
3345         if v in ('true', 'false', 'null'):
3346             return v
3347         elif v in ('undefined', 'void 0'):
3348             return 'null'
3349         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3350             return ''
3351
3352         if v[0] in STRING_QUOTES:
3353             escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v[1:-1])
3354             return f'"{escaped}"'
3355
3356         for regex, base in INTEGER_TABLE:
3357             im = re.match(regex, v)
3358             if im:
3359                 i = int(im.group(1), base)
3360                 return f'"{i}":' if v.endswith(':') else str(i)
3361
3362         if v in vars:
3363             return json.dumps(vars[v])
3364
3365         if not strict:
3366             return f'"{v}"'
3367
3368         raise ValueError(f'Unknown value: {v}')
3369
3370     def create_map(mobj):
3371         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
3372
3373     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
3374     if not strict:
3375         code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3376         code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
3377
3378     return re.sub(rf'''(?sx)
3379         {STRING_RE}|
3380         {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
3381         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3382         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
3383         [0-9]+(?={SKIP_RE}:)|
3384         !+
3385         ''', fix_kv, code)
3386
3387
3388 def qualities(quality_ids):
3389     """ Get a numeric quality value out of a list of possible values """
3390     def q(qid):
3391         try:
3392             return quality_ids.index(qid)
3393         except ValueError:
3394             return -1
3395     return q
3396
3397
3398 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3399
3400
3401 DEFAULT_OUTTMPL = {
3402     'default': '%(title)s [%(id)s].%(ext)s',
3403     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3404 }
3405 OUTTMPL_TYPES = {
3406     'chapter': None,
3407     'subtitle': None,
3408     'thumbnail': None,
3409     'description': 'description',
3410     'annotation': 'annotations.xml',
3411     'infojson': 'info.json',
3412     'link': None,
3413     'pl_video': None,
3414     'pl_thumbnail': None,
3415     'pl_description': 'description',
3416     'pl_infojson': 'info.json',
3417 }
3418
3419 # As of [1] format syntax is:
3420 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3421 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3422 STR_FORMAT_RE_TMPL = r'''(?x)
3423     (?<!%)(?P<prefix>(?:%%)*)
3424     %
3425     (?P<has_key>\((?P<key>{0})\))?
3426     (?P<format>
3427         (?P<conversion>[#0\-+ ]+)?
3428         (?P<min_width>\d+)?
3429         (?P<precision>\.\d+)?
3430         (?P<len_mod>[hlL])?  # unused in python
3431         {1}  # conversion type
3432     )
3433 '''
3434
3435
3436 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3437
3438
3439 def limit_length(s, length):
3440     """ Add ellipses to overly long strings """
3441     if s is None:
3442         return None
3443     ELLIPSES = '...'
3444     if len(s) > length:
3445         return s[:length - len(ELLIPSES)] + ELLIPSES
3446     return s
3447
3448
3449 def version_tuple(v):
3450     return tuple(int(e) for e in re.split(r'[-.]', v))
3451
3452
3453 def is_outdated_version(version, limit, assume_new=True):
3454     if not version:
3455         return not assume_new
3456     try:
3457         return version_tuple(version) < version_tuple(limit)
3458     except ValueError:
3459         return not assume_new
3460
3461
3462 def ytdl_is_updateable():
3463     """ Returns if yt-dlp can be updated with -U """
3464
3465     from .update import is_non_updateable
3466
3467     return not is_non_updateable()
3468
3469
3470 def args_to_str(args):
3471     # Get a short string representation for a subprocess command
3472     return ' '.join(compat_shlex_quote(a) for a in args)
3473
3474
3475 def error_to_compat_str(err):
3476     return str(err)
3477
3478
3479 def error_to_str(err):
3480     return f'{type(err).__name__}: {err}'
3481
3482
3483 def mimetype2ext(mt):
3484     if mt is None:
3485         return None
3486
3487     mt, _, params = mt.partition(';')
3488     mt = mt.strip()
3489
3490     FULL_MAP = {
3491         'audio/mp4': 'm4a',
3492         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3493         # it's the most popular one
3494         'audio/mpeg': 'mp3',
3495         'audio/x-wav': 'wav',
3496         'audio/wav': 'wav',
3497         'audio/wave': 'wav',
3498     }
3499
3500     ext = FULL_MAP.get(mt)
3501     if ext is not None:
3502         return ext
3503
3504     SUBTYPE_MAP = {
3505         '3gpp': '3gp',
3506         'smptett+xml': 'tt',
3507         'ttaf+xml': 'dfxp',
3508         'ttml+xml': 'ttml',
3509         'x-flv': 'flv',
3510         'x-mp4-fragmented': 'mp4',
3511         'x-ms-sami': 'sami',
3512         'x-ms-wmv': 'wmv',
3513         'mpegurl': 'm3u8',
3514         'x-mpegurl': 'm3u8',
3515         'vnd.apple.mpegurl': 'm3u8',
3516         'dash+xml': 'mpd',
3517         'f4m+xml': 'f4m',
3518         'hds+xml': 'f4m',
3519         'vnd.ms-sstr+xml': 'ism',
3520         'quicktime': 'mov',
3521         'mp2t': 'ts',
3522         'x-wav': 'wav',
3523         'filmstrip+json': 'fs',
3524         'svg+xml': 'svg',
3525     }
3526
3527     _, _, subtype = mt.rpartition('/')
3528     ext = SUBTYPE_MAP.get(subtype.lower())
3529     if ext is not None:
3530         return ext
3531
3532     SUFFIX_MAP = {
3533         'json': 'json',
3534         'xml': 'xml',
3535         'zip': 'zip',
3536         'gzip': 'gz',
3537     }
3538
3539     _, _, suffix = subtype.partition('+')
3540     ext = SUFFIX_MAP.get(suffix)
3541     if ext is not None:
3542         return ext
3543
3544     return subtype.replace('+', '.')
3545
3546
3547 def ext2mimetype(ext_or_url):
3548     if not ext_or_url:
3549         return None
3550     if '.' not in ext_or_url:
3551         ext_or_url = f'file.{ext_or_url}'
3552     return mimetypes.guess_type(ext_or_url)[0]
3553
3554
3555 def parse_codecs(codecs_str):
3556     # http://tools.ietf.org/html/rfc6381
3557     if not codecs_str:
3558         return {}
3559     split_codecs = list(filter(None, map(
3560         str.strip, codecs_str.strip().strip(',').split(','))))
3561     vcodec, acodec, scodec, hdr = None, None, None, None
3562     for full_codec in split_codecs:
3563         parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
3564         if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3565                         'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3566             if vcodec:
3567                 continue
3568             vcodec = full_codec
3569             if parts[0] in ('dvh1', 'dvhe'):
3570                 hdr = 'DV'
3571             elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
3572                 hdr = 'HDR10'
3573             elif parts[:2] == ['vp9', '2']:
3574                 hdr = 'HDR10'
3575         elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac',
3576                           'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3577             acodec = acodec or full_codec
3578         elif parts[0] in ('stpp', 'wvtt'):
3579             scodec = scodec or full_codec
3580         else:
3581             write_string(f'WARNING: Unknown codec {full_codec}\n')
3582     if vcodec or acodec or scodec:
3583         return {
3584             'vcodec': vcodec or 'none',
3585             'acodec': acodec or 'none',
3586             'dynamic_range': hdr,
3587             **({'scodec': scodec} if scodec is not None else {}),
3588         }
3589     elif len(split_codecs) == 2:
3590         return {
3591             'vcodec': split_codecs[0],
3592             'acodec': split_codecs[1],
3593         }
3594     return {}
3595
3596
3597 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
3598     assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
3599
3600     allow_mkv = not preferences or 'mkv' in preferences
3601
3602     if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3603         return 'mkv'  # TODO: any other format allows this?
3604
3605     # TODO: All codecs supported by parse_codecs isn't handled here
3606     COMPATIBLE_CODECS = {
3607         'mp4': {
3608             'av1', 'hevc', 'avc1', 'mp4a',  # fourcc (m3u8, mpd)
3609             'h264', 'aacl', 'ec-3',  # Set in ISM
3610         },
3611         'webm': {
3612             'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3613             'vp9x', 'vp8x',  # in the webm spec
3614         },
3615     }
3616
3617     sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
3618     vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3619
3620     for ext in preferences or COMPATIBLE_CODECS.keys():
3621         codec_set = COMPATIBLE_CODECS.get(ext, set())
3622         if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3623             return ext
3624
3625     COMPATIBLE_EXTS = (
3626         {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3627         {'webm'},
3628     )
3629     for ext in preferences or vexts:
3630         current_exts = {ext, *vexts, *aexts}
3631         if ext == 'mkv' or current_exts == {ext} or any(
3632                 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3633             return ext
3634     return 'mkv' if allow_mkv else preferences[-1]
3635
3636
3637 def urlhandle_detect_ext(url_handle):
3638     getheader = url_handle.headers.get
3639
3640     cd = getheader('Content-Disposition')
3641     if cd:
3642         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3643         if m:
3644             e = determine_ext(m.group('filename'), default_ext=None)
3645             if e:
3646                 return e
3647
3648     return mimetype2ext(getheader('Content-Type'))
3649
3650
3651 def encode_data_uri(data, mime_type):
3652     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3653
3654
3655 def age_restricted(content_limit, age_limit):
3656     """ Returns True iff the content should be blocked """
3657
3658     if age_limit is None:  # No limit set
3659         return False
3660     if content_limit is None:
3661         return False  # Content available for everyone
3662     return age_limit < content_limit
3663
3664
3665 # List of known byte-order-marks (BOM)
3666 BOMS = [
3667     (b'\xef\xbb\xbf', 'utf-8'),
3668     (b'\x00\x00\xfe\xff', 'utf-32-be'),
3669     (b'\xff\xfe\x00\x00', 'utf-32-le'),
3670     (b'\xff\xfe', 'utf-16-le'),
3671     (b'\xfe\xff', 'utf-16-be'),
3672 ]
3673
3674
3675 def is_html(first_bytes):
3676     """ Detect whether a file contains HTML by examining its first bytes. """
3677
3678     encoding = 'utf-8'
3679     for bom, enc in BOMS:
3680         while first_bytes.startswith(bom):
3681             encoding, first_bytes = enc, first_bytes[len(bom):]
3682
3683     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3684
3685
3686 def determine_protocol(info_dict):
3687     protocol = info_dict.get('protocol')
3688     if protocol is not None:
3689         return protocol
3690
3691     url = sanitize_url(info_dict['url'])
3692     if url.startswith('rtmp'):
3693         return 'rtmp'
3694     elif url.startswith('mms'):
3695         return 'mms'
3696     elif url.startswith('rtsp'):
3697         return 'rtsp'
3698
3699     ext = determine_ext(url)
3700     if ext == 'm3u8':
3701         return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
3702     elif ext == 'f4m':
3703         return 'f4m'
3704
3705     return urllib.parse.urlparse(url).scheme
3706
3707
3708 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3709     """ Render a list of rows, each as a list of values.
3710     Text after a \t will be right aligned """
3711     def width(string):
3712         return len(remove_terminal_sequences(string).replace('\t', ''))
3713
3714     def get_max_lens(table):
3715         return [max(width(str(v)) for v in col) for col in zip(*table)]
3716
3717     def filter_using_list(row, filterArray):
3718         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3719
3720     max_lens = get_max_lens(data) if hide_empty else []
3721     header_row = filter_using_list(header_row, max_lens)
3722     data = [filter_using_list(row, max_lens) for row in data]
3723
3724     table = [header_row] + data
3725     max_lens = get_max_lens(table)
3726     extra_gap += 1
3727     if delim:
3728         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3729         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3730     for row in table:
3731         for pos, text in enumerate(map(str, row)):
3732             if '\t' in text:
3733                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3734             else:
3735                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3736     ret = '\n'.join(''.join(row).rstrip() for row in table)
3737     return ret
3738
3739
3740 def _match_one(filter_part, dct, incomplete):
3741     # TODO: Generalize code with YoutubeDL._build_format_filter
3742     STRING_OPERATORS = {
3743         '*=': operator.contains,
3744         '^=': lambda attr, value: attr.startswith(value),
3745         '$=': lambda attr, value: attr.endswith(value),
3746         '~=': lambda attr, value: re.search(value, attr),
3747     }
3748     COMPARISON_OPERATORS = {
3749         **STRING_OPERATORS,
3750         '<=': operator.le,  # "<=" must be defined above "<"
3751         '<': operator.lt,
3752         '>=': operator.ge,
3753         '>': operator.gt,
3754         '=': operator.eq,
3755     }
3756
3757     if isinstance(incomplete, bool):
3758         is_incomplete = lambda _: incomplete
3759     else:
3760         is_incomplete = lambda k: k in incomplete
3761
3762     operator_rex = re.compile(r'''(?x)
3763         (?P<key>[a-z_]+)
3764         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3765         (?:
3766             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3767             (?P<strval>.+?)
3768         )
3769         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3770     m = operator_rex.fullmatch(filter_part.strip())
3771     if m:
3772         m = m.groupdict()
3773         unnegated_op = COMPARISON_OPERATORS[m['op']]
3774         if m['negation']:
3775             op = lambda attr, value: not unnegated_op(attr, value)
3776         else:
3777             op = unnegated_op
3778         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3779         if m['quote']:
3780             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3781         actual_value = dct.get(m['key'])
3782         numeric_comparison = None
3783         if isinstance(actual_value, (int, float)):
3784             # If the original field is a string and matching comparisonvalue is
3785             # a number we should respect the origin of the original field
3786             # and process comparison value as a string (see
3787             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3788             try:
3789                 numeric_comparison = int(comparison_value)
3790             except ValueError:
3791                 numeric_comparison = parse_filesize(comparison_value)
3792                 if numeric_comparison is None:
3793                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3794                 if numeric_comparison is None:
3795                     numeric_comparison = parse_duration(comparison_value)
3796         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3797             raise ValueError('Operator %s only supports string values!' % m['op'])
3798         if actual_value is None:
3799             return is_incomplete(m['key']) or m['none_inclusive']
3800         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3801
3802     UNARY_OPERATORS = {
3803         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3804         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3805     }
3806     operator_rex = re.compile(r'''(?x)
3807         (?P<op>%s)\s*(?P<key>[a-z_]+)
3808         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3809     m = operator_rex.fullmatch(filter_part.strip())
3810     if m:
3811         op = UNARY_OPERATORS[m.group('op')]
3812         actual_value = dct.get(m.group('key'))
3813         if is_incomplete(m.group('key')) and actual_value is None:
3814             return True
3815         return op(actual_value)
3816
3817     raise ValueError('Invalid filter part %r' % filter_part)
3818
3819
3820 def match_str(filter_str, dct, incomplete=False):
3821     """ Filter a dictionary with a simple string syntax.
3822     @returns           Whether the filter passes
3823     @param incomplete  Set of keys that is expected to be missing from dct.
3824                        Can be True/False to indicate all/none of the keys may be missing.
3825                        All conditions on incomplete keys pass if the key is missing
3826     """
3827     return all(
3828         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3829         for filter_part in re.split(r'(?<!\\)&', filter_str))
3830
3831
3832 def match_filter_func(filters):
3833     if not filters:
3834         return None
3835     filters = set(variadic(filters))
3836
3837     interactive = '-' in filters
3838     if interactive:
3839         filters.remove('-')
3840
3841     def _match_func(info_dict, incomplete=False):
3842         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3843             return NO_DEFAULT if interactive and not incomplete else None
3844         else:
3845             video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
3846             filter_str = ') | ('.join(map(str.strip, filters))
3847             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3848     return _match_func
3849
3850
3851 class download_range_func:
3852     def __init__(self, chapters, ranges):
3853         self.chapters, self.ranges = chapters, ranges
3854
3855     def __call__(self, info_dict, ydl):
3856         if not self.ranges and not self.chapters:
3857             yield {}
3858
3859         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3860                    else 'Cannot match chapters since chapter information is unavailable')
3861         for regex in self.chapters or []:
3862             for i, chapter in enumerate(info_dict.get('chapters') or []):
3863                 if re.search(regex, chapter['title']):
3864                     warning = None
3865                     yield {**chapter, 'index': i}
3866         if self.chapters and warning:
3867             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3868
3869         yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
3870
3871     def __eq__(self, other):
3872         return (isinstance(other, download_range_func)
3873                 and self.chapters == other.chapters and self.ranges == other.ranges)
3874
3875     def __repr__(self):
3876         return f'{type(self).__name__}({self.chapters}, {self.ranges})'
3877
3878
3879 def parse_dfxp_time_expr(time_expr):
3880     if not time_expr:
3881         return
3882
3883     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3884     if mobj:
3885         return float(mobj.group('time_offset'))
3886
3887     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3888     if mobj:
3889         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3890
3891
3892 def srt_subtitles_timecode(seconds):
3893     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3894
3895
3896 def ass_subtitles_timecode(seconds):
3897     time = timetuple_from_msec(seconds * 1000)
3898     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3899
3900
3901 def dfxp2srt(dfxp_data):
3902     '''
3903     @param dfxp_data A bytes-like object containing DFXP data
3904     @returns A unicode object containing converted SRT data
3905     '''
3906     LEGACY_NAMESPACES = (
3907         (b'http://www.w3.org/ns/ttml', [
3908             b'http://www.w3.org/2004/11/ttaf1',
3909             b'http://www.w3.org/2006/04/ttaf1',
3910             b'http://www.w3.org/2006/10/ttaf1',
3911         ]),
3912         (b'http://www.w3.org/ns/ttml#styling', [
3913             b'http://www.w3.org/ns/ttml#style',
3914         ]),
3915     )
3916
3917     SUPPORTED_STYLING = [
3918         'color',
3919         'fontFamily',
3920         'fontSize',
3921         'fontStyle',
3922         'fontWeight',
3923         'textDecoration'
3924     ]
3925
3926     _x = functools.partial(xpath_with_ns, ns_map={
3927         'xml': 'http://www.w3.org/XML/1998/namespace',
3928         'ttml': 'http://www.w3.org/ns/ttml',
3929         'tts': 'http://www.w3.org/ns/ttml#styling',
3930     })
3931
3932     styles = {}
3933     default_style = {}
3934
3935     class TTMLPElementParser:
3936         _out = ''
3937         _unclosed_elements = []
3938         _applied_styles = []
3939
3940         def start(self, tag, attrib):
3941             if tag in (_x('ttml:br'), 'br'):
3942                 self._out += '\n'
3943             else:
3944                 unclosed_elements = []
3945                 style = {}
3946                 element_style_id = attrib.get('style')
3947                 if default_style:
3948                     style.update(default_style)
3949                 if element_style_id:
3950                     style.update(styles.get(element_style_id, {}))
3951                 for prop in SUPPORTED_STYLING:
3952                     prop_val = attrib.get(_x('tts:' + prop))
3953                     if prop_val:
3954                         style[prop] = prop_val
3955                 if style:
3956                     font = ''
3957                     for k, v in sorted(style.items()):
3958                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3959                             continue
3960                         if k == 'color':
3961                             font += ' color="%s"' % v
3962                         elif k == 'fontSize':
3963                             font += ' size="%s"' % v
3964                         elif k == 'fontFamily':
3965                             font += ' face="%s"' % v
3966                         elif k == 'fontWeight' and v == 'bold':
3967                             self._out += '<b>'
3968                             unclosed_elements.append('b')
3969                         elif k == 'fontStyle' and v == 'italic':
3970                             self._out += '<i>'
3971                             unclosed_elements.append('i')
3972                         elif k == 'textDecoration' and v == 'underline':
3973                             self._out += '<u>'
3974                             unclosed_elements.append('u')
3975                     if font:
3976                         self._out += '<font' + font + '>'
3977                         unclosed_elements.append('font')
3978                     applied_style = {}
3979                     if self._applied_styles:
3980                         applied_style.update(self._applied_styles[-1])
3981                     applied_style.update(style)
3982                     self._applied_styles.append(applied_style)
3983                 self._unclosed_elements.append(unclosed_elements)
3984
3985         def end(self, tag):
3986             if tag not in (_x('ttml:br'), 'br'):
3987                 unclosed_elements = self._unclosed_elements.pop()
3988                 for element in reversed(unclosed_elements):
3989                     self._out += '</%s>' % element
3990                 if unclosed_elements and self._applied_styles:
3991                     self._applied_styles.pop()
3992
3993         def data(self, data):
3994             self._out += data
3995
3996         def close(self):
3997             return self._out.strip()
3998
3999     def parse_node(node):
4000         target = TTMLPElementParser()
4001         parser = xml.etree.ElementTree.XMLParser(target=target)
4002         parser.feed(xml.etree.ElementTree.tostring(node))
4003         return parser.close()
4004
4005     for k, v in LEGACY_NAMESPACES:
4006         for ns in v:
4007             dfxp_data = dfxp_data.replace(ns, k)
4008
4009     dfxp = compat_etree_fromstring(dfxp_data)
4010     out = []
4011     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4012
4013     if not paras:
4014         raise ValueError('Invalid dfxp/TTML subtitle')
4015
4016     repeat = False
4017     while True:
4018         for style in dfxp.findall(_x('.//ttml:style')):
4019             style_id = style.get('id') or style.get(_x('xml:id'))
4020             if not style_id:
4021                 continue
4022             parent_style_id = style.get('style')
4023             if parent_style_id:
4024                 if parent_style_id not in styles:
4025                     repeat = True
4026                     continue
4027                 styles[style_id] = styles[parent_style_id].copy()
4028             for prop in SUPPORTED_STYLING:
4029                 prop_val = style.get(_x('tts:' + prop))
4030                 if prop_val:
4031                     styles.setdefault(style_id, {})[prop] = prop_val
4032         if repeat:
4033             repeat = False
4034         else:
4035             break
4036
4037     for p in ('body', 'div'):
4038         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4039         if ele is None:
4040             continue
4041         style = styles.get(ele.get('style'))
4042         if not style:
4043             continue
4044         default_style.update(style)
4045
4046     for para, index in zip(paras, itertools.count(1)):
4047         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4048         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4049         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4050         if begin_time is None:
4051             continue
4052         if not end_time:
4053             if not dur:
4054                 continue
4055             end_time = begin_time + dur
4056         out.append('%d\n%s --> %s\n%s\n\n' % (
4057             index,
4058             srt_subtitles_timecode(begin_time),
4059             srt_subtitles_timecode(end_time),
4060             parse_node(para)))
4061
4062     return ''.join(out)
4063
4064
4065 def cli_option(params, command_option, param, separator=None):
4066     param = params.get(param)
4067     return ([] if param is None
4068             else [command_option, str(param)] if separator is None
4069             else [f'{command_option}{separator}{param}'])
4070
4071
4072 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4073     param = params.get(param)
4074     assert param in (True, False, None)
4075     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
4076
4077
4078 def cli_valueless_option(params, command_option, param, expected_value=True):
4079     return [command_option] if params.get(param) == expected_value else []
4080
4081
4082 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4083     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4084         if use_compat:
4085             return argdict
4086         else:
4087             argdict = None
4088     if argdict is None:
4089         return default
4090     assert isinstance(argdict, dict)
4091
4092     assert isinstance(keys, (list, tuple))
4093     for key_list in keys:
4094         arg_list = list(filter(
4095             lambda x: x is not None,
4096             [argdict.get(key.lower()) for key in variadic(key_list)]))
4097         if arg_list:
4098             return [arg for args in arg_list for arg in args]
4099     return default
4100
4101
4102 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
4103     main_key, exe = main_key.lower(), exe.lower()
4104     root_key = exe if main_key == exe else f'{main_key}+{exe}'
4105     keys = [f'{root_key}{k}' for k in (keys or [''])]
4106     if root_key in keys:
4107         if main_key != exe:
4108             keys.append((main_key, exe))
4109         keys.append('default')
4110     else:
4111         use_compat = False
4112     return cli_configuration_args(argdict, keys, default, use_compat)
4113
4114
4115 class ISO639Utils:
4116     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4117     _lang_map = {
4118         'aa': 'aar',
4119         'ab': 'abk',
4120         'ae': 'ave',
4121         'af': 'afr',
4122         'ak': 'aka',
4123         'am': 'amh',
4124         'an': 'arg',
4125         'ar': 'ara',
4126         'as': 'asm',
4127         'av': 'ava',
4128         'ay': 'aym',
4129         'az': 'aze',
4130         'ba': 'bak',
4131         'be': 'bel',
4132         'bg': 'bul',
4133         'bh': 'bih',
4134         'bi': 'bis',
4135         'bm': 'bam',
4136         'bn': 'ben',
4137         'bo': 'bod',
4138         'br': 'bre',
4139         'bs': 'bos',
4140         'ca': 'cat',
4141         'ce': 'che',
4142         'ch': 'cha',
4143         'co': 'cos',
4144         'cr': 'cre',
4145         'cs': 'ces',
4146         'cu': 'chu',
4147         'cv': 'chv',
4148         'cy': 'cym',
4149         'da': 'dan',
4150         'de': 'deu',
4151         'dv': 'div',
4152         'dz': 'dzo',
4153         'ee': 'ewe',
4154         'el': 'ell',
4155         'en': 'eng',
4156         'eo': 'epo',
4157         'es': 'spa',
4158         'et': 'est',
4159         'eu': 'eus',
4160         'fa': 'fas',
4161         'ff': 'ful',
4162         'fi': 'fin',
4163         'fj': 'fij',
4164         'fo': 'fao',
4165         'fr': 'fra',
4166         'fy': 'fry',
4167         'ga': 'gle',
4168         'gd': 'gla',
4169         'gl': 'glg',
4170         'gn': 'grn',
4171         'gu': 'guj',
4172         'gv': 'glv',
4173         'ha': 'hau',
4174         'he': 'heb',
4175         'iw': 'heb',  # Replaced by he in 1989 revision
4176         'hi': 'hin',
4177         'ho': 'hmo',
4178         'hr': 'hrv',
4179         'ht': 'hat',
4180         'hu': 'hun',
4181         'hy': 'hye',
4182         'hz': 'her',
4183         'ia': 'ina',
4184         'id': 'ind',
4185         'in': 'ind',  # Replaced by id in 1989 revision
4186         'ie': 'ile',
4187         'ig': 'ibo',
4188         'ii': 'iii',
4189         'ik': 'ipk',
4190         'io': 'ido',
4191         'is': 'isl',
4192         'it': 'ita',
4193         'iu': 'iku',
4194         'ja': 'jpn',
4195         'jv': 'jav',
4196         'ka': 'kat',
4197         'kg': 'kon',
4198         'ki': 'kik',
4199         'kj': 'kua',
4200         'kk': 'kaz',
4201         'kl': 'kal',
4202         'km': 'khm',
4203         'kn': 'kan',
4204         'ko': 'kor',
4205         'kr': 'kau',
4206         'ks': 'kas',
4207         'ku': 'kur',
4208         'kv': 'kom',
4209         'kw': 'cor',
4210         'ky': 'kir',
4211         'la': 'lat',
4212         'lb': 'ltz',
4213         'lg': 'lug',
4214         'li': 'lim',
4215         'ln': 'lin',
4216         'lo': 'lao',
4217         'lt': 'lit',
4218         'lu': 'lub',
4219         'lv': 'lav',
4220         'mg': 'mlg',
4221         'mh': 'mah',
4222         'mi': 'mri',
4223         'mk': 'mkd',
4224         'ml': 'mal',
4225         'mn': 'mon',
4226         'mr': 'mar',
4227         'ms': 'msa',
4228         'mt': 'mlt',
4229         'my': 'mya',
4230         'na': 'nau',
4231         'nb': 'nob',
4232         'nd': 'nde',
4233         'ne': 'nep',
4234         'ng': 'ndo',
4235         'nl': 'nld',
4236         'nn': 'nno',
4237         'no': 'nor',
4238         'nr': 'nbl',
4239         'nv': 'nav',
4240         'ny': 'nya',
4241         'oc': 'oci',
4242         'oj': 'oji',
4243         'om': 'orm',
4244         'or': 'ori',
4245         'os': 'oss',
4246         'pa': 'pan',
4247         'pi': 'pli',
4248         'pl': 'pol',
4249         'ps': 'pus',
4250         'pt': 'por',
4251         'qu': 'que',
4252         'rm': 'roh',
4253         'rn': 'run',
4254         'ro': 'ron',
4255         'ru': 'rus',
4256         'rw': 'kin',
4257         'sa': 'san',
4258         'sc': 'srd',
4259         'sd': 'snd',
4260         'se': 'sme',
4261         'sg': 'sag',
4262         'si': 'sin',
4263         'sk': 'slk',
4264         'sl': 'slv',
4265         'sm': 'smo',
4266         'sn': 'sna',
4267         'so': 'som',
4268         'sq': 'sqi',
4269         'sr': 'srp',
4270         'ss': 'ssw',
4271         'st': 'sot',
4272         'su': 'sun',
4273         'sv': 'swe',
4274         'sw': 'swa',
4275         'ta': 'tam',
4276         'te': 'tel',
4277         'tg': 'tgk',
4278         'th': 'tha',
4279         'ti': 'tir',
4280         'tk': 'tuk',
4281         'tl': 'tgl',
4282         'tn': 'tsn',
4283         'to': 'ton',
4284         'tr': 'tur',
4285         'ts': 'tso',
4286         'tt': 'tat',
4287         'tw': 'twi',
4288         'ty': 'tah',
4289         'ug': 'uig',
4290         'uk': 'ukr',
4291         'ur': 'urd',
4292         'uz': 'uzb',
4293         've': 'ven',
4294         'vi': 'vie',
4295         'vo': 'vol',
4296         'wa': 'wln',
4297         'wo': 'wol',
4298         'xh': 'xho',
4299         'yi': 'yid',
4300         'ji': 'yid',  # Replaced by yi in 1989 revision
4301         'yo': 'yor',
4302         'za': 'zha',
4303         'zh': 'zho',
4304         'zu': 'zul',
4305     }
4306
4307     @classmethod
4308     def short2long(cls, code):
4309         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4310         return cls._lang_map.get(code[:2])
4311
4312     @classmethod
4313     def long2short(cls, code):
4314         """Convert language code from ISO 639-2/T to ISO 639-1"""
4315         for short_name, long_name in cls._lang_map.items():
4316             if long_name == code:
4317                 return short_name
4318
4319
4320 class ISO3166Utils:
4321     # From http://data.okfn.org/data/core/country-list
4322     _country_map = {
4323         'AF': 'Afghanistan',
4324         'AX': 'Åland Islands',
4325         'AL': 'Albania',
4326         'DZ': 'Algeria',
4327         'AS': 'American Samoa',
4328         'AD': 'Andorra',
4329         'AO': 'Angola',
4330         'AI': 'Anguilla',
4331         'AQ': 'Antarctica',
4332         'AG': 'Antigua and Barbuda',
4333         'AR': 'Argentina',
4334         'AM': 'Armenia',
4335         'AW': 'Aruba',
4336         'AU': 'Australia',
4337         'AT': 'Austria',
4338         'AZ': 'Azerbaijan',
4339         'BS': 'Bahamas',
4340         'BH': 'Bahrain',
4341         'BD': 'Bangladesh',
4342         'BB': 'Barbados',
4343         'BY': 'Belarus',
4344         'BE': 'Belgium',
4345         'BZ': 'Belize',
4346         'BJ': 'Benin',
4347         'BM': 'Bermuda',
4348         'BT': 'Bhutan',
4349         'BO': 'Bolivia, Plurinational State of',
4350         'BQ': 'Bonaire, Sint Eustatius and Saba',
4351         'BA': 'Bosnia and Herzegovina',
4352         'BW': 'Botswana',
4353         'BV': 'Bouvet Island',
4354         'BR': 'Brazil',
4355         'IO': 'British Indian Ocean Territory',
4356         'BN': 'Brunei Darussalam',
4357         'BG': 'Bulgaria',
4358         'BF': 'Burkina Faso',
4359         'BI': 'Burundi',
4360         'KH': 'Cambodia',
4361         'CM': 'Cameroon',
4362         'CA': 'Canada',
4363         'CV': 'Cape Verde',
4364         'KY': 'Cayman Islands',
4365         'CF': 'Central African Republic',
4366         'TD': 'Chad',
4367         'CL': 'Chile',
4368         'CN': 'China',
4369         'CX': 'Christmas Island',
4370         'CC': 'Cocos (Keeling) Islands',
4371         'CO': 'Colombia',
4372         'KM': 'Comoros',
4373         'CG': 'Congo',
4374         'CD': 'Congo, the Democratic Republic of the',
4375         'CK': 'Cook Islands',
4376         'CR': 'Costa Rica',
4377         'CI': 'Côte d\'Ivoire',
4378         'HR': 'Croatia',
4379         'CU': 'Cuba',
4380         'CW': 'Curaçao',
4381         'CY': 'Cyprus',
4382         'CZ': 'Czech Republic',
4383         'DK': 'Denmark',
4384         'DJ': 'Djibouti',
4385         'DM': 'Dominica',
4386         'DO': 'Dominican Republic',
4387         'EC': 'Ecuador',
4388         'EG': 'Egypt',
4389         'SV': 'El Salvador',
4390         'GQ': 'Equatorial Guinea',
4391         'ER': 'Eritrea',
4392         'EE': 'Estonia',
4393         'ET': 'Ethiopia',
4394         'FK': 'Falkland Islands (Malvinas)',
4395         'FO': 'Faroe Islands',
4396         'FJ': 'Fiji',
4397         'FI': 'Finland',
4398         'FR': 'France',
4399         'GF': 'French Guiana',
4400         'PF': 'French Polynesia',
4401         'TF': 'French Southern Territories',
4402         'GA': 'Gabon',
4403         'GM': 'Gambia',
4404         'GE': 'Georgia',
4405         'DE': 'Germany',
4406         'GH': 'Ghana',
4407         'GI': 'Gibraltar',
4408         'GR': 'Greece',
4409         'GL': 'Greenland',
4410         'GD': 'Grenada',
4411         'GP': 'Guadeloupe',
4412         'GU': 'Guam',
4413         'GT': 'Guatemala',
4414         'GG': 'Guernsey',
4415         'GN': 'Guinea',
4416         'GW': 'Guinea-Bissau',
4417         'GY': 'Guyana',
4418         'HT': 'Haiti',
4419         'HM': 'Heard Island and McDonald Islands',
4420         'VA': 'Holy See (Vatican City State)',
4421         'HN': 'Honduras',
4422         'HK': 'Hong Kong',
4423         'HU': 'Hungary',
4424         'IS': 'Iceland',
4425         'IN': 'India',
4426         'ID': 'Indonesia',
4427         'IR': 'Iran, Islamic Republic of',
4428         'IQ': 'Iraq',
4429         'IE': 'Ireland',
4430         'IM': 'Isle of Man',
4431         'IL': 'Israel',
4432         'IT': 'Italy',
4433         'JM': 'Jamaica',
4434         'JP': 'Japan',
4435         'JE': 'Jersey',
4436         'JO': 'Jordan',
4437         'KZ': 'Kazakhstan',
4438         'KE': 'Kenya',
4439         'KI': 'Kiribati',
4440         'KP': 'Korea, Democratic People\'s Republic of',
4441         'KR': 'Korea, Republic of',
4442         'KW': 'Kuwait',
4443         'KG': 'Kyrgyzstan',
4444         'LA': 'Lao People\'s Democratic Republic',
4445         'LV': 'Latvia',
4446         'LB': 'Lebanon',
4447         'LS': 'Lesotho',
4448         'LR': 'Liberia',
4449         'LY': 'Libya',
4450         'LI': 'Liechtenstein',
4451         'LT': 'Lithuania',
4452         'LU': 'Luxembourg',
4453         'MO': 'Macao',
4454         'MK': 'Macedonia, the Former Yugoslav Republic of',
4455         'MG': 'Madagascar',
4456         'MW': 'Malawi',
4457         'MY': 'Malaysia',
4458         'MV': 'Maldives',
4459         'ML': 'Mali',
4460         'MT': 'Malta',
4461         'MH': 'Marshall Islands',
4462         'MQ': 'Martinique',
4463         'MR': 'Mauritania',
4464         'MU': 'Mauritius',
4465         'YT': 'Mayotte',
4466         'MX': 'Mexico',
4467         'FM': 'Micronesia, Federated States of',
4468         'MD': 'Moldova, Republic of',
4469         'MC': 'Monaco',
4470         'MN': 'Mongolia',
4471         'ME': 'Montenegro',
4472         'MS': 'Montserrat',
4473         'MA': 'Morocco',
4474         'MZ': 'Mozambique',
4475         'MM': 'Myanmar',
4476         'NA': 'Namibia',
4477         'NR': 'Nauru',
4478         'NP': 'Nepal',
4479         'NL': 'Netherlands',
4480         'NC': 'New Caledonia',
4481         'NZ': 'New Zealand',
4482         'NI': 'Nicaragua',
4483         'NE': 'Niger',
4484         'NG': 'Nigeria',
4485         'NU': 'Niue',
4486         'NF': 'Norfolk Island',
4487         'MP': 'Northern Mariana Islands',
4488         'NO': 'Norway',
4489         'OM': 'Oman',
4490         'PK': 'Pakistan',
4491         'PW': 'Palau',
4492         'PS': 'Palestine, State of',
4493         'PA': 'Panama',
4494         'PG': 'Papua New Guinea',
4495         'PY': 'Paraguay',
4496         'PE': 'Peru',
4497         'PH': 'Philippines',
4498         'PN': 'Pitcairn',
4499         'PL': 'Poland',
4500         'PT': 'Portugal',
4501         'PR': 'Puerto Rico',
4502         'QA': 'Qatar',
4503         'RE': 'Réunion',
4504         'RO': 'Romania',
4505         'RU': 'Russian Federation',
4506         'RW': 'Rwanda',
4507         'BL': 'Saint Barthélemy',
4508         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4509         'KN': 'Saint Kitts and Nevis',
4510         'LC': 'Saint Lucia',
4511         'MF': 'Saint Martin (French part)',
4512         'PM': 'Saint Pierre and Miquelon',
4513         'VC': 'Saint Vincent and the Grenadines',
4514         'WS': 'Samoa',
4515         'SM': 'San Marino',
4516         'ST': 'Sao Tome and Principe',
4517         'SA': 'Saudi Arabia',
4518         'SN': 'Senegal',
4519         'RS': 'Serbia',
4520         'SC': 'Seychelles',
4521         'SL': 'Sierra Leone',
4522         'SG': 'Singapore',
4523         'SX': 'Sint Maarten (Dutch part)',
4524         'SK': 'Slovakia',
4525         'SI': 'Slovenia',
4526         'SB': 'Solomon Islands',
4527         'SO': 'Somalia',
4528         'ZA': 'South Africa',
4529         'GS': 'South Georgia and the South Sandwich Islands',
4530         'SS': 'South Sudan',
4531         'ES': 'Spain',
4532         'LK': 'Sri Lanka',
4533         'SD': 'Sudan',
4534         'SR': 'Suriname',
4535         'SJ': 'Svalbard and Jan Mayen',
4536         'SZ': 'Swaziland',
4537         'SE': 'Sweden',
4538         'CH': 'Switzerland',
4539         'SY': 'Syrian Arab Republic',
4540         'TW': 'Taiwan, Province of China',
4541         'TJ': 'Tajikistan',
4542         'TZ': 'Tanzania, United Republic of',
4543         'TH': 'Thailand',
4544         'TL': 'Timor-Leste',
4545         'TG': 'Togo',
4546         'TK': 'Tokelau',
4547         'TO': 'Tonga',
4548         'TT': 'Trinidad and Tobago',
4549         'TN': 'Tunisia',
4550         'TR': 'Turkey',
4551         'TM': 'Turkmenistan',
4552         'TC': 'Turks and Caicos Islands',
4553         'TV': 'Tuvalu',
4554         'UG': 'Uganda',
4555         'UA': 'Ukraine',
4556         'AE': 'United Arab Emirates',
4557         'GB': 'United Kingdom',
4558         'US': 'United States',
4559         'UM': 'United States Minor Outlying Islands',
4560         'UY': 'Uruguay',
4561         'UZ': 'Uzbekistan',
4562         'VU': 'Vanuatu',
4563         'VE': 'Venezuela, Bolivarian Republic of',
4564         'VN': 'Viet Nam',
4565         'VG': 'Virgin Islands, British',
4566         'VI': 'Virgin Islands, U.S.',
4567         'WF': 'Wallis and Futuna',
4568         'EH': 'Western Sahara',
4569         'YE': 'Yemen',
4570         'ZM': 'Zambia',
4571         'ZW': 'Zimbabwe',
4572         # Not ISO 3166 codes, but used for IP blocks
4573         'AP': 'Asia/Pacific Region',
4574         'EU': 'Europe',
4575     }
4576
4577     @classmethod
4578     def short2full(cls, code):
4579         """Convert an ISO 3166-2 country code to the corresponding full name"""
4580         return cls._country_map.get(code.upper())
4581
4582
4583 class GeoUtils:
4584     # Major IPv4 address blocks per country
4585     _country_ip_map = {
4586         'AD': '46.172.224.0/19',
4587         'AE': '94.200.0.0/13',
4588         'AF': '149.54.0.0/17',
4589         'AG': '209.59.64.0/18',
4590         'AI': '204.14.248.0/21',
4591         'AL': '46.99.0.0/16',
4592         'AM': '46.70.0.0/15',
4593         'AO': '105.168.0.0/13',
4594         'AP': '182.50.184.0/21',
4595         'AQ': '23.154.160.0/24',
4596         'AR': '181.0.0.0/12',
4597         'AS': '202.70.112.0/20',
4598         'AT': '77.116.0.0/14',
4599         'AU': '1.128.0.0/11',
4600         'AW': '181.41.0.0/18',
4601         'AX': '185.217.4.0/22',
4602         'AZ': '5.197.0.0/16',
4603         'BA': '31.176.128.0/17',
4604         'BB': '65.48.128.0/17',
4605         'BD': '114.130.0.0/16',
4606         'BE': '57.0.0.0/8',
4607         'BF': '102.178.0.0/15',
4608         'BG': '95.42.0.0/15',
4609         'BH': '37.131.0.0/17',
4610         'BI': '154.117.192.0/18',
4611         'BJ': '137.255.0.0/16',
4612         'BL': '185.212.72.0/23',
4613         'BM': '196.12.64.0/18',
4614         'BN': '156.31.0.0/16',
4615         'BO': '161.56.0.0/16',
4616         'BQ': '161.0.80.0/20',
4617         'BR': '191.128.0.0/12',
4618         'BS': '24.51.64.0/18',
4619         'BT': '119.2.96.0/19',
4620         'BW': '168.167.0.0/16',
4621         'BY': '178.120.0.0/13',
4622         'BZ': '179.42.192.0/18',
4623         'CA': '99.224.0.0/11',
4624         'CD': '41.243.0.0/16',
4625         'CF': '197.242.176.0/21',
4626         'CG': '160.113.0.0/16',
4627         'CH': '85.0.0.0/13',
4628         'CI': '102.136.0.0/14',
4629         'CK': '202.65.32.0/19',
4630         'CL': '152.172.0.0/14',
4631         'CM': '102.244.0.0/14',
4632         'CN': '36.128.0.0/10',
4633         'CO': '181.240.0.0/12',
4634         'CR': '201.192.0.0/12',
4635         'CU': '152.206.0.0/15',
4636         'CV': '165.90.96.0/19',
4637         'CW': '190.88.128.0/17',
4638         'CY': '31.153.0.0/16',
4639         'CZ': '88.100.0.0/14',
4640         'DE': '53.0.0.0/8',
4641         'DJ': '197.241.0.0/17',
4642         'DK': '87.48.0.0/12',
4643         'DM': '192.243.48.0/20',
4644         'DO': '152.166.0.0/15',
4645         'DZ': '41.96.0.0/12',
4646         'EC': '186.68.0.0/15',
4647         'EE': '90.190.0.0/15',
4648         'EG': '156.160.0.0/11',
4649         'ER': '196.200.96.0/20',
4650         'ES': '88.0.0.0/11',
4651         'ET': '196.188.0.0/14',
4652         'EU': '2.16.0.0/13',
4653         'FI': '91.152.0.0/13',
4654         'FJ': '144.120.0.0/16',
4655         'FK': '80.73.208.0/21',
4656         'FM': '119.252.112.0/20',
4657         'FO': '88.85.32.0/19',
4658         'FR': '90.0.0.0/9',
4659         'GA': '41.158.0.0/15',
4660         'GB': '25.0.0.0/8',
4661         'GD': '74.122.88.0/21',
4662         'GE': '31.146.0.0/16',
4663         'GF': '161.22.64.0/18',
4664         'GG': '62.68.160.0/19',
4665         'GH': '154.160.0.0/12',
4666         'GI': '95.164.0.0/16',
4667         'GL': '88.83.0.0/19',
4668         'GM': '160.182.0.0/15',
4669         'GN': '197.149.192.0/18',
4670         'GP': '104.250.0.0/19',
4671         'GQ': '105.235.224.0/20',
4672         'GR': '94.64.0.0/13',
4673         'GT': '168.234.0.0/16',
4674         'GU': '168.123.0.0/16',
4675         'GW': '197.214.80.0/20',
4676         'GY': '181.41.64.0/18',
4677         'HK': '113.252.0.0/14',
4678         'HN': '181.210.0.0/16',
4679         'HR': '93.136.0.0/13',
4680         'HT': '148.102.128.0/17',
4681         'HU': '84.0.0.0/14',
4682         'ID': '39.192.0.0/10',
4683         'IE': '87.32.0.0/12',
4684         'IL': '79.176.0.0/13',
4685         'IM': '5.62.80.0/20',
4686         'IN': '117.192.0.0/10',
4687         'IO': '203.83.48.0/21',
4688         'IQ': '37.236.0.0/14',
4689         'IR': '2.176.0.0/12',
4690         'IS': '82.221.0.0/16',
4691         'IT': '79.0.0.0/10',
4692         'JE': '87.244.64.0/18',
4693         'JM': '72.27.0.0/17',
4694         'JO': '176.29.0.0/16',
4695         'JP': '133.0.0.0/8',
4696         'KE': '105.48.0.0/12',
4697         'KG': '158.181.128.0/17',
4698         'KH': '36.37.128.0/17',
4699         'KI': '103.25.140.0/22',
4700         'KM': '197.255.224.0/20',
4701         'KN': '198.167.192.0/19',
4702         'KP': '175.45.176.0/22',
4703         'KR': '175.192.0.0/10',
4704         'KW': '37.36.0.0/14',
4705         'KY': '64.96.0.0/15',
4706         'KZ': '2.72.0.0/13',
4707         'LA': '115.84.64.0/18',
4708         'LB': '178.135.0.0/16',
4709         'LC': '24.92.144.0/20',
4710         'LI': '82.117.0.0/19',
4711         'LK': '112.134.0.0/15',
4712         'LR': '102.183.0.0/16',
4713         'LS': '129.232.0.0/17',
4714         'LT': '78.56.0.0/13',
4715         'LU': '188.42.0.0/16',
4716         'LV': '46.109.0.0/16',
4717         'LY': '41.252.0.0/14',
4718         'MA': '105.128.0.0/11',
4719         'MC': '88.209.64.0/18',
4720         'MD': '37.246.0.0/16',
4721         'ME': '178.175.0.0/17',
4722         'MF': '74.112.232.0/21',
4723         'MG': '154.126.0.0/17',
4724         'MH': '117.103.88.0/21',
4725         'MK': '77.28.0.0/15',
4726         'ML': '154.118.128.0/18',
4727         'MM': '37.111.0.0/17',
4728         'MN': '49.0.128.0/17',
4729         'MO': '60.246.0.0/16',
4730         'MP': '202.88.64.0/20',
4731         'MQ': '109.203.224.0/19',
4732         'MR': '41.188.64.0/18',
4733         'MS': '208.90.112.0/22',
4734         'MT': '46.11.0.0/16',
4735         'MU': '105.16.0.0/12',
4736         'MV': '27.114.128.0/18',
4737         'MW': '102.70.0.0/15',
4738         'MX': '187.192.0.0/11',
4739         'MY': '175.136.0.0/13',
4740         'MZ': '197.218.0.0/15',
4741         'NA': '41.182.0.0/16',
4742         'NC': '101.101.0.0/18',
4743         'NE': '197.214.0.0/18',
4744         'NF': '203.17.240.0/22',
4745         'NG': '105.112.0.0/12',
4746         'NI': '186.76.0.0/15',
4747         'NL': '145.96.0.0/11',
4748         'NO': '84.208.0.0/13',
4749         'NP': '36.252.0.0/15',
4750         'NR': '203.98.224.0/19',
4751         'NU': '49.156.48.0/22',
4752         'NZ': '49.224.0.0/14',
4753         'OM': '5.36.0.0/15',
4754         'PA': '186.72.0.0/15',
4755         'PE': '186.160.0.0/14',
4756         'PF': '123.50.64.0/18',
4757         'PG': '124.240.192.0/19',
4758         'PH': '49.144.0.0/13',
4759         'PK': '39.32.0.0/11',
4760         'PL': '83.0.0.0/11',
4761         'PM': '70.36.0.0/20',
4762         'PR': '66.50.0.0/16',
4763         'PS': '188.161.0.0/16',
4764         'PT': '85.240.0.0/13',
4765         'PW': '202.124.224.0/20',
4766         'PY': '181.120.0.0/14',
4767         'QA': '37.210.0.0/15',
4768         'RE': '102.35.0.0/16',
4769         'RO': '79.112.0.0/13',
4770         'RS': '93.86.0.0/15',
4771         'RU': '5.136.0.0/13',
4772         'RW': '41.186.0.0/16',
4773         'SA': '188.48.0.0/13',
4774         'SB': '202.1.160.0/19',
4775         'SC': '154.192.0.0/11',
4776         'SD': '102.120.0.0/13',
4777         'SE': '78.64.0.0/12',
4778         'SG': '8.128.0.0/10',
4779         'SI': '188.196.0.0/14',
4780         'SK': '78.98.0.0/15',
4781         'SL': '102.143.0.0/17',
4782         'SM': '89.186.32.0/19',
4783         'SN': '41.82.0.0/15',
4784         'SO': '154.115.192.0/18',
4785         'SR': '186.179.128.0/17',
4786         'SS': '105.235.208.0/21',
4787         'ST': '197.159.160.0/19',
4788         'SV': '168.243.0.0/16',
4789         'SX': '190.102.0.0/20',
4790         'SY': '5.0.0.0/16',
4791         'SZ': '41.84.224.0/19',
4792         'TC': '65.255.48.0/20',
4793         'TD': '154.68.128.0/19',
4794         'TG': '196.168.0.0/14',
4795         'TH': '171.96.0.0/13',
4796         'TJ': '85.9.128.0/18',
4797         'TK': '27.96.24.0/21',
4798         'TL': '180.189.160.0/20',
4799         'TM': '95.85.96.0/19',
4800         'TN': '197.0.0.0/11',
4801         'TO': '175.176.144.0/21',
4802         'TR': '78.160.0.0/11',
4803         'TT': '186.44.0.0/15',
4804         'TV': '202.2.96.0/19',
4805         'TW': '120.96.0.0/11',
4806         'TZ': '156.156.0.0/14',
4807         'UA': '37.52.0.0/14',
4808         'UG': '102.80.0.0/13',
4809         'US': '6.0.0.0/8',
4810         'UY': '167.56.0.0/13',
4811         'UZ': '84.54.64.0/18',
4812         'VA': '212.77.0.0/19',
4813         'VC': '207.191.240.0/21',
4814         'VE': '186.88.0.0/13',
4815         'VG': '66.81.192.0/20',
4816         'VI': '146.226.0.0/16',
4817         'VN': '14.160.0.0/11',
4818         'VU': '202.80.32.0/20',
4819         'WF': '117.20.32.0/21',
4820         'WS': '202.4.32.0/19',
4821         'YE': '134.35.0.0/16',
4822         'YT': '41.242.116.0/22',
4823         'ZA': '41.0.0.0/11',
4824         'ZM': '102.144.0.0/13',
4825         'ZW': '102.177.192.0/18',
4826     }
4827
4828     @classmethod
4829     def random_ipv4(cls, code_or_block):
4830         if len(code_or_block) == 2:
4831             block = cls._country_ip_map.get(code_or_block.upper())
4832             if not block:
4833                 return None
4834         else:
4835             block = code_or_block
4836         addr, preflen = block.split('/')
4837         addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
4838         addr_max = addr_min | (0xffffffff >> int(preflen))
4839         return str(socket.inet_ntoa(
4840             struct.pack('!L', random.randint(addr_min, addr_max))))
4841
4842
4843 class PerRequestProxyHandler(urllib.request.ProxyHandler):
4844     def __init__(self, proxies=None):
4845         # Set default handlers
4846         for type in ('http', 'https'):
4847             setattr(self, '%s_open' % type,
4848                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4849                         meth(r, proxy, type))
4850         urllib.request.ProxyHandler.__init__(self, proxies)
4851
4852     def proxy_open(self, req, proxy, type):
4853         req_proxy = req.headers.get('Ytdl-request-proxy')
4854         if req_proxy is not None:
4855             proxy = req_proxy
4856             del req.headers['Ytdl-request-proxy']
4857
4858         if proxy == '__noproxy__':
4859             return None  # No Proxy
4860         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4861             req.add_header('Ytdl-socks-proxy', proxy)
4862             # yt-dlp's http/https handlers do wrapping the socket with socks
4863             return None
4864         return urllib.request.ProxyHandler.proxy_open(
4865             self, req, proxy, type)
4866
4867
4868 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4869 # released into Public Domain
4870 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4871
4872 def long_to_bytes(n, blocksize=0):
4873     """long_to_bytes(n:long, blocksize:int) : string
4874     Convert a long integer to a byte string.
4875
4876     If optional blocksize is given and greater than zero, pad the front of the
4877     byte string with binary zeros so that the length is a multiple of
4878     blocksize.
4879     """
4880     # after much testing, this algorithm was deemed to be the fastest
4881     s = b''
4882     n = int(n)
4883     while n > 0:
4884         s = struct.pack('>I', n & 0xffffffff) + s
4885         n = n >> 32
4886     # strip off leading zeros
4887     for i in range(len(s)):
4888         if s[i] != b'\000'[0]:
4889             break
4890     else:
4891         # only happens when n == 0
4892         s = b'\000'
4893         i = 0
4894     s = s[i:]
4895     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4896     # de-padding being done above, but sigh...
4897     if blocksize > 0 and len(s) % blocksize:
4898         s = (blocksize - len(s) % blocksize) * b'\000' + s
4899     return s
4900
4901
4902 def bytes_to_long(s):
4903     """bytes_to_long(string) : long
4904     Convert a byte string to a long integer.
4905
4906     This is (essentially) the inverse of long_to_bytes().
4907     """
4908     acc = 0
4909     length = len(s)
4910     if length % 4:
4911         extra = (4 - length % 4)
4912         s = b'\000' * extra + s
4913         length = length + extra
4914     for i in range(0, length, 4):
4915         acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
4916     return acc
4917
4918
4919 def ohdave_rsa_encrypt(data, exponent, modulus):
4920     '''
4921     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4922
4923     Input:
4924         data: data to encrypt, bytes-like object
4925         exponent, modulus: parameter e and N of RSA algorithm, both integer
4926     Output: hex string of encrypted data
4927
4928     Limitation: supports one block encryption only
4929     '''
4930
4931     payload = int(binascii.hexlify(data[::-1]), 16)
4932     encrypted = pow(payload, exponent, modulus)
4933     return '%x' % encrypted
4934
4935
4936 def pkcs1pad(data, length):
4937     """
4938     Padding input data with PKCS#1 scheme
4939
4940     @param {int[]} data        input data
4941     @param {int}   length      target length
4942     @returns {int[]}           padded data
4943     """
4944     if len(data) > length - 11:
4945         raise ValueError('Input data too long for PKCS#1 padding')
4946
4947     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4948     return [0, 2] + pseudo_random + [0] + data
4949
4950
4951 def _base_n_table(n, table):
4952     if not table and not n:
4953         raise ValueError('Either table or n must be specified')
4954     table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
4955
4956     if n and n != len(table):
4957         raise ValueError(f'base {n} exceeds table length {len(table)}')
4958     return table
4959
4960
4961 def encode_base_n(num, n=None, table=None):
4962     """Convert given int to a base-n string"""
4963     table = _base_n_table(n, table)
4964     if not num:
4965         return table[0]
4966
4967     result, base = '', len(table)
4968     while num:
4969         result = table[num % base] + result
4970         num = num // base
4971     return result
4972
4973
4974 def decode_base_n(string, n=None, table=None):
4975     """Convert given base-n string to int"""
4976     table = {char: index for index, char in enumerate(_base_n_table(n, table))}
4977     result, base = 0, len(table)
4978     for char in string:
4979         result = result * base + table[char]
4980     return result
4981
4982
4983 def decode_base(value, digits):
4984     deprecation_warning(f'{__name__}.decode_base is deprecated and may be removed '
4985                         f'in a future version. Use {__name__}.decode_base_n instead')
4986     return decode_base_n(value, table=digits)
4987
4988
4989 def decode_packed_codes(code):
4990     mobj = re.search(PACKED_CODES_RE, code)
4991     obfuscated_code, base, count, symbols = mobj.groups()
4992     base = int(base)
4993     count = int(count)
4994     symbols = symbols.split('|')
4995     symbol_table = {}
4996
4997     while count:
4998         count -= 1
4999         base_n_count = encode_base_n(count, base)
5000         symbol_table[base_n_count] = symbols[count] or base_n_count
5001
5002     return re.sub(
5003         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5004         obfuscated_code)
5005
5006
5007 def caesar(s, alphabet, shift):
5008     if shift == 0:
5009         return s
5010     l = len(alphabet)
5011     return ''.join(
5012         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5013         for c in s)
5014
5015
5016 def rot47(s):
5017     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5018
5019
5020 def parse_m3u8_attributes(attrib):
5021     info = {}
5022     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5023         if val.startswith('"'):
5024             val = val[1:-1]
5025         info[key] = val
5026     return info
5027
5028
5029 def urshift(val, n):
5030     return val >> n if val >= 0 else (val + 0x100000000) >> n
5031
5032
5033 # Based on png2str() written by @gdkchan and improved by @yokrysty
5034 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5035 def decode_png(png_data):
5036     # Reference: https://www.w3.org/TR/PNG/
5037     header = png_data[8:]
5038
5039     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5040         raise OSError('Not a valid PNG file.')
5041
5042     int_map = {1: '>B', 2: '>H', 4: '>I'}
5043     unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
5044
5045     chunks = []
5046
5047     while header:
5048         length = unpack_integer(header[:4])
5049         header = header[4:]
5050
5051         chunk_type = header[:4]
5052         header = header[4:]
5053
5054         chunk_data = header[:length]
5055         header = header[length:]
5056
5057         header = header[4:]  # Skip CRC
5058
5059         chunks.append({
5060             'type': chunk_type,
5061             'length': length,
5062             'data': chunk_data
5063         })
5064
5065     ihdr = chunks[0]['data']
5066
5067     width = unpack_integer(ihdr[:4])
5068     height = unpack_integer(ihdr[4:8])
5069
5070     idat = b''
5071
5072     for chunk in chunks:
5073         if chunk['type'] == b'IDAT':
5074             idat += chunk['data']
5075
5076     if not idat:
5077         raise OSError('Unable to read PNG data.')
5078
5079     decompressed_data = bytearray(zlib.decompress(idat))
5080
5081     stride = width * 3
5082     pixels = []
5083
5084     def _get_pixel(idx):
5085         x = idx % stride
5086         y = idx // stride
5087         return pixels[y][x]
5088
5089     for y in range(height):
5090         basePos = y * (1 + stride)
5091         filter_type = decompressed_data[basePos]
5092
5093         current_row = []
5094
5095         pixels.append(current_row)
5096
5097         for x in range(stride):
5098             color = decompressed_data[1 + basePos + x]
5099             basex = y * stride + x
5100             left = 0
5101             up = 0
5102
5103             if x > 2:
5104                 left = _get_pixel(basex - 3)
5105             if y > 0:
5106                 up = _get_pixel(basex - stride)
5107
5108             if filter_type == 1:  # Sub
5109                 color = (color + left) & 0xff
5110             elif filter_type == 2:  # Up
5111                 color = (color + up) & 0xff
5112             elif filter_type == 3:  # Average
5113                 color = (color + ((left + up) >> 1)) & 0xff
5114             elif filter_type == 4:  # Paeth
5115                 a = left
5116                 b = up
5117                 c = 0
5118
5119                 if x > 2 and y > 0:
5120                     c = _get_pixel(basex - stride - 3)
5121
5122                 p = a + b - c
5123
5124                 pa = abs(p - a)
5125                 pb = abs(p - b)
5126                 pc = abs(p - c)
5127
5128                 if pa <= pb and pa <= pc:
5129                     color = (color + a) & 0xff
5130                 elif pb <= pc:
5131                     color = (color + b) & 0xff
5132                 else:
5133                     color = (color + c) & 0xff
5134
5135             current_row.append(color)
5136
5137     return width, height, pixels
5138
5139
5140 def write_xattr(path, key, value):
5141     # Windows: Write xattrs to NTFS Alternate Data Streams:
5142     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5143     if compat_os_name == 'nt':
5144         assert ':' not in key
5145         assert os.path.exists(path)
5146
5147         try:
5148             with open(f'{path}:{key}', 'wb') as f:
5149                 f.write(value)
5150         except OSError as e:
5151             raise XAttrMetadataError(e.errno, e.strerror)
5152         return
5153
5154     # UNIX Method 1. Use xattrs/pyxattrs modules
5155
5156     setxattr = None
5157     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
5158         # Unicode arguments are not supported in pyxattr until version 0.5.0
5159         # See https://github.com/ytdl-org/youtube-dl/issues/5498
5160         if version_tuple(xattr.__version__) >= (0, 5, 0):
5161             setxattr = xattr.set
5162     elif xattr:
5163         setxattr = xattr.setxattr
5164
5165     if setxattr:
5166         try:
5167             setxattr(path, key, value)
5168         except OSError as e:
5169             raise XAttrMetadataError(e.errno, e.strerror)
5170         return
5171
5172     # UNIX Method 2. Use setfattr/xattr executables
5173     exe = ('setfattr' if check_executable('setfattr', ['--version'])
5174            else 'xattr' if check_executable('xattr', ['-h']) else None)
5175     if not exe:
5176         raise XAttrUnavailableError(
5177             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
5178             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
5179
5180     value = value.decode()
5181     try:
5182         _, stderr, returncode = Popen.run(
5183             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
5184             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5185     except OSError as e:
5186         raise XAttrMetadataError(e.errno, e.strerror)
5187     if returncode:
5188         raise XAttrMetadataError(returncode, stderr)
5189
5190
5191 def random_birthday(year_field, month_field, day_field):
5192     start_date = datetime.date(1950, 1, 1)
5193     end_date = datetime.date(1995, 12, 31)
5194     offset = random.randint(0, (end_date - start_date).days)
5195     random_date = start_date + datetime.timedelta(offset)
5196     return {
5197         year_field: str(random_date.year),
5198         month_field: str(random_date.month),
5199         day_field: str(random_date.day),
5200     }
5201
5202
5203 # Templates for internet shortcut files, which are plain text files.
5204 DOT_URL_LINK_TEMPLATE = '''\
5205 [InternetShortcut]
5206 URL=%(url)s
5207 '''
5208
5209 DOT_WEBLOC_LINK_TEMPLATE = '''\
5210 <?xml version="1.0" encoding="UTF-8"?>
5211 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5212 <plist version="1.0">
5213 <dict>
5214 \t<key>URL</key>
5215 \t<string>%(url)s</string>
5216 </dict>
5217 </plist>
5218 '''
5219
5220 DOT_DESKTOP_LINK_TEMPLATE = '''\
5221 [Desktop Entry]
5222 Encoding=UTF-8
5223 Name=%(filename)s
5224 Type=Link
5225 URL=%(url)s
5226 Icon=text-html
5227 '''
5228
5229 LINK_TEMPLATES = {
5230     'url': DOT_URL_LINK_TEMPLATE,
5231     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5232     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5233 }
5234
5235
5236 def iri_to_uri(iri):
5237     """
5238     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5239
5240     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5241     """
5242
5243     iri_parts = urllib.parse.urlparse(iri)
5244
5245     if '[' in iri_parts.netloc:
5246         raise ValueError('IPv6 URIs are not, yet, supported.')
5247         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5248
5249     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5250
5251     net_location = ''
5252     if iri_parts.username:
5253         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5254         if iri_parts.password is not None:
5255             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5256         net_location += '@'
5257
5258     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5259     # The 'idna' encoding produces ASCII text.
5260     if iri_parts.port is not None and iri_parts.port != 80:
5261         net_location += ':' + str(iri_parts.port)
5262
5263     return urllib.parse.urlunparse(
5264         (iri_parts.scheme,
5265             net_location,
5266
5267             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5268
5269             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5270             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5271
5272             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5273             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5274
5275             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5276
5277     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5278
5279
5280 def to_high_limit_path(path):
5281     if sys.platform in ['win32', 'cygwin']:
5282         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5283         return '\\\\?\\' + os.path.abspath(path)
5284
5285     return path
5286
5287
5288 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
5289     val = traverse_obj(obj, *variadic(field))
5290     if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
5291         return default
5292     return template % func(val)
5293
5294
5295 def clean_podcast_url(url):
5296     return re.sub(r'''(?x)
5297         (?:
5298             (?:
5299                 chtbl\.com/track|
5300                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5301                 play\.podtrac\.com
5302             )/[^/]+|
5303             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5304             flex\.acast\.com|
5305             pd(?:
5306                 cn\.co| # https://podcorn.com/analytics-prefix/
5307                 st\.fm # https://podsights.com/docs/
5308             )/e
5309         )/''', '', url)
5310
5311
5312 _HEX_TABLE = '0123456789abcdef'
5313
5314
5315 def random_uuidv4():
5316     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5317
5318
5319 def make_dir(path, to_screen=None):
5320     try:
5321         dn = os.path.dirname(path)
5322         if dn and not os.path.exists(dn):
5323             os.makedirs(dn)
5324         return True
5325     except OSError as err:
5326         if callable(to_screen) is not None:
5327             to_screen('unable to create directory ' + error_to_compat_str(err))
5328         return False
5329
5330
5331 def get_executable_path():
5332     from .update import _get_variant_and_executable_path
5333
5334     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5335
5336
5337 def load_plugins(name, suffix, namespace):
5338     classes = {}
5339     with contextlib.suppress(FileNotFoundError):
5340         plugins_spec = importlib.util.spec_from_file_location(
5341             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5342         plugins = importlib.util.module_from_spec(plugins_spec)
5343         sys.modules[plugins_spec.name] = plugins
5344         plugins_spec.loader.exec_module(plugins)
5345         for name in dir(plugins):
5346             if name in namespace:
5347                 continue
5348             if not name.endswith(suffix):
5349                 continue
5350             klass = getattr(plugins, name)
5351             classes[name] = namespace[name] = klass
5352     return classes
5353
5354
5355 def traverse_obj(
5356         obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
5357         casesense=True, is_user_input=False, traverse_string=False):
5358     """
5359     Safely traverse nested `dict`s and `Sequence`s
5360
5361     >>> obj = [{}, {"key": "value"}]
5362     >>> traverse_obj(obj, (1, "key"))
5363     "value"
5364
5365     Each of the provided `paths` is tested and the first producing a valid result will be returned.
5366     The next path will also be tested if the path branched but no results could be found.
5367     Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
5368     A value of None is treated as the absence of a value.
5369
5370     The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
5371
5372     The keys in the path can be one of:
5373         - `None`:           Return the current object.
5374         - `str`/`int`:      Return `obj[key]`. For `re.Match, return `obj.group(key)`.
5375         - `slice`:          Branch out and return all values in `obj[key]`.
5376         - `Ellipsis`:       Branch out and return a list of all values.
5377         - `tuple`/`list`:   Branch out and return a list of all matching values.
5378                             Read as: `[traverse_obj(obj, branch) for branch in branches]`.
5379         - `function`:       Branch out and return values filtered by the function.
5380                             Read as: `[value for key, value in obj if function(key, value)]`.
5381                             For `Sequence`s, `key` is the index of the value.
5382         - `dict`            Transform the current object and return a matching dict.
5383                             Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
5384
5385         `tuple`, `list`, and `dict` all support nested paths and branches.
5386
5387     @params paths           Paths which to traverse by.
5388     @param default          Value to return if the paths do not match.
5389     @param expected_type    If a `type`, only accept final values of this type.
5390                             If any other callable, try to call the function on each result.
5391     @param get_all          If `False`, return the first matching result, otherwise all matching ones.
5392     @param casesense        If `False`, consider string dictionary keys as case insensitive.
5393
5394     The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
5395
5396     @param is_user_input    Whether the keys are generated from user input.
5397                             If `True` strings get converted to `int`/`slice` if needed.
5398     @param traverse_string  Whether to traverse into objects as strings.
5399                             If `True`, any non-compatible object will first be
5400                             converted into a string and then traversed into.
5401
5402
5403     @returns                The result of the object traversal.
5404                             If successful, `get_all=True`, and the path branches at least once,
5405                             then a list of results is returned instead.
5406                             A list is always returned if the last path branches and no `default` is given.
5407     """
5408     is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
5409     casefold = lambda k: k.casefold() if isinstance(k, str) else k
5410
5411     if isinstance(expected_type, type):
5412         type_test = lambda val: val if isinstance(val, expected_type) else None
5413     else:
5414         type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
5415
5416     def apply_key(key, obj):
5417         if obj is None:
5418             return
5419
5420         elif key is None:
5421             yield obj
5422
5423         elif isinstance(key, (list, tuple)):
5424             for branch in key:
5425                 _, result = apply_path(obj, branch)
5426                 yield from result
5427
5428         elif key is ...:
5429             if isinstance(obj, collections.abc.Mapping):
5430                 yield from obj.values()
5431             elif is_sequence(obj):
5432                 yield from obj
5433             elif isinstance(obj, re.Match):
5434                 yield from obj.groups()
5435             elif traverse_string:
5436                 yield from str(obj)
5437
5438         elif callable(key):
5439             if is_sequence(obj):
5440                 iter_obj = enumerate(obj)
5441             elif isinstance(obj, collections.abc.Mapping):
5442                 iter_obj = obj.items()
5443             elif isinstance(obj, re.Match):
5444                 iter_obj = enumerate((obj.group(), *obj.groups()))
5445             elif traverse_string:
5446                 iter_obj = enumerate(str(obj))
5447             else:
5448                 return
5449             yield from (v for k, v in iter_obj if try_call(key, args=(k, v)))
5450
5451         elif isinstance(key, dict):
5452             iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
5453             yield {k: v if v is not None else default for k, v in iter_obj
5454                    if v is not None or default is not NO_DEFAULT}
5455
5456         elif isinstance(obj, collections.abc.Mapping):
5457             yield (obj.get(key) if casesense or (key in obj)
5458                    else next((v for k, v in obj.items() if casefold(k) == key), None))
5459
5460         elif isinstance(obj, re.Match):
5461             if isinstance(key, int) or casesense:
5462                 with contextlib.suppress(IndexError):
5463                     yield obj.group(key)
5464                     return
5465
5466             if not isinstance(key, str):
5467                 return
5468
5469             yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
5470
5471         else:
5472             if is_user_input:
5473                 key = (int_or_none(key) if ':' not in key
5474                        else slice(*map(int_or_none, key.split(':'))))
5475
5476             if not isinstance(key, (int, slice)):
5477                 return
5478
5479             if not is_sequence(obj):
5480                 if not traverse_string:
5481                     return
5482                 obj = str(obj)
5483
5484             with contextlib.suppress(IndexError):
5485                 yield obj[key]
5486
5487     def apply_path(start_obj, path):
5488         objs = (start_obj,)
5489         has_branched = False
5490
5491         for key in variadic(path):
5492             if is_user_input and key == ':':
5493                 key = ...
5494
5495             if not casesense and isinstance(key, str):
5496                 key = key.casefold()
5497
5498             if key is ... or isinstance(key, (list, tuple)) or callable(key):
5499                 has_branched = True
5500
5501             key_func = functools.partial(apply_key, key)
5502             objs = itertools.chain.from_iterable(map(key_func, objs))
5503
5504         return has_branched, objs
5505
5506     def _traverse_obj(obj, path, use_list=True):
5507         has_branched, results = apply_path(obj, path)
5508         results = LazyList(x for x in map(type_test, results) if x is not None)
5509
5510         if get_all and has_branched:
5511             return results.exhaust() if results or use_list else None
5512
5513         return results[0] if results else None
5514
5515     for index, path in enumerate(paths, 1):
5516         use_list = default is NO_DEFAULT and index == len(paths)
5517         result = _traverse_obj(obj, path, use_list)
5518         if result is not None:
5519             return result
5520
5521     return None if default is NO_DEFAULT else default
5522
5523
5524 def traverse_dict(dictn, keys, casesense=True):
5525     deprecation_warning(f'"{__name__}.traverse_dict" is deprecated and may be removed '
5526                         f'in a future version. Use "{__name__}.traverse_obj" instead')
5527     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5528
5529
5530 def get_first(obj, keys, **kwargs):
5531     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5532
5533
5534 def time_seconds(**kwargs):
5535     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5536     return t.timestamp()
5537
5538
5539 # create a JSON Web Signature (jws) with HS256 algorithm
5540 # the resulting format is in JWS Compact Serialization
5541 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5542 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5543 def jwt_encode_hs256(payload_data, key, headers={}):
5544     header_data = {
5545         'alg': 'HS256',
5546         'typ': 'JWT',
5547     }
5548     if headers:
5549         header_data.update(headers)
5550     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5551     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5552     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5553     signature_b64 = base64.b64encode(h.digest())
5554     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5555     return token
5556
5557
5558 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5559 def jwt_decode_hs256(jwt):
5560     header_b64, payload_b64, signature_b64 = jwt.split('.')
5561     # add trailing ='s that may have been stripped, superfluous ='s are ignored
5562     payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
5563     return payload_data
5564
5565
5566 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5567
5568
5569 @functools.cache
5570 def supports_terminal_sequences(stream):
5571     if compat_os_name == 'nt':
5572         if not WINDOWS_VT_MODE:
5573             return False
5574     elif not os.getenv('TERM'):
5575         return False
5576     try:
5577         return stream.isatty()
5578     except BaseException:
5579         return False
5580
5581
5582 def windows_enable_vt_mode():
5583     """Ref: https://bugs.python.org/issue30075 """
5584     if get_windows_version() < (10, 0, 10586):
5585         return
5586
5587     import ctypes
5588     import ctypes.wintypes
5589     import msvcrt
5590
5591     ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
5592
5593     dll = ctypes.WinDLL('kernel32', use_last_error=False)
5594     handle = os.open('CONOUT$', os.O_RDWR)
5595
5596     try:
5597         h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle))
5598         dw_original_mode = ctypes.wintypes.DWORD()
5599         success = dll.GetConsoleMode(h_out, ctypes.byref(dw_original_mode))
5600         if not success:
5601             raise Exception('GetConsoleMode failed')
5602
5603         success = dll.SetConsoleMode(h_out, ctypes.wintypes.DWORD(
5604             dw_original_mode.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING))
5605         if not success:
5606             raise Exception('SetConsoleMode failed')
5607     except Exception as e:
5608         write_string(f'WARNING: Cannot enable VT mode - {e}')
5609     else:
5610         global WINDOWS_VT_MODE
5611         WINDOWS_VT_MODE = True
5612         supports_terminal_sequences.cache_clear()
5613     finally:
5614         os.close(handle)
5615
5616
5617 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5618
5619
5620 def remove_terminal_sequences(string):
5621     return _terminal_sequences_re.sub('', string)
5622
5623
5624 def number_of_digits(number):
5625     return len('%d' % number)
5626
5627
5628 def join_nonempty(*values, delim='-', from_dict=None):
5629     if from_dict is not None:
5630         values = (traverse_obj(from_dict, variadic(v)) for v in values)
5631     return delim.join(map(str, filter(None, values)))
5632
5633
5634 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5635     """
5636     Find the largest format dimensions in terms of video width and, for each thumbnail:
5637     * Modify the URL: Match the width with the provided regex and replace with the former width
5638     * Update dimensions
5639
5640     This function is useful with video services that scale the provided thumbnails on demand
5641     """
5642     _keys = ('width', 'height')
5643     max_dimensions = max(
5644         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5645         default=(0, 0))
5646     if not max_dimensions[0]:
5647         return thumbnails
5648     return [
5649         merge_dicts(
5650             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5651             dict(zip(_keys, max_dimensions)), thumbnail)
5652         for thumbnail in thumbnails
5653     ]
5654
5655
5656 def parse_http_range(range):
5657     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5658     if not range:
5659         return None, None, None
5660     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5661     if not crg:
5662         return None, None, None
5663     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5664
5665
5666 def read_stdin(what):
5667     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5668     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5669     return sys.stdin
5670
5671
5672 def determine_file_encoding(data):
5673     """
5674     Detect the text encoding used
5675     @returns (encoding, bytes to skip)
5676     """
5677
5678     # BOM marks are given priority over declarations
5679     for bom, enc in BOMS:
5680         if data.startswith(bom):
5681             return enc, len(bom)
5682
5683     # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
5684     # We ignore the endianness to get a good enough match
5685     data = data.replace(b'\0', b'')
5686     mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
5687     return mobj.group(1).decode() if mobj else None, 0
5688
5689
5690 class Config:
5691     own_args = None
5692     parsed_args = None
5693     filename = None
5694     __initialized = False
5695
5696     def __init__(self, parser, label=None):
5697         self.parser, self.label = parser, label
5698         self._loaded_paths, self.configs = set(), []
5699
5700     def init(self, args=None, filename=None):
5701         assert not self.__initialized
5702         self.own_args, self.filename = args, filename
5703         return self.load_configs()
5704
5705     def load_configs(self):
5706         directory = ''
5707         if self.filename:
5708             location = os.path.realpath(self.filename)
5709             directory = os.path.dirname(location)
5710             if location in self._loaded_paths:
5711                 return False
5712             self._loaded_paths.add(location)
5713
5714         self.__initialized = True
5715         opts, _ = self.parser.parse_known_args(self.own_args)
5716         self.parsed_args = self.own_args
5717         for location in opts.config_locations or []:
5718             if location == '-':
5719                 if location in self._loaded_paths:
5720                     continue
5721                 self._loaded_paths.add(location)
5722                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5723                 continue
5724             location = os.path.join(directory, expand_path(location))
5725             if os.path.isdir(location):
5726                 location = os.path.join(location, 'yt-dlp.conf')
5727             if not os.path.exists(location):
5728                 self.parser.error(f'config location {location} does not exist')
5729             self.append_config(self.read_file(location), location)
5730         return True
5731
5732     def __str__(self):
5733         label = join_nonempty(
5734             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5735             delim=' ')
5736         return join_nonempty(
5737             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5738             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5739             delim='\n')
5740
5741     @staticmethod
5742     def read_file(filename, default=[]):
5743         try:
5744             optionf = open(filename, 'rb')
5745         except OSError:
5746             return default  # silently skip if file is not present
5747         try:
5748             enc, skip = determine_file_encoding(optionf.read(512))
5749             optionf.seek(skip, io.SEEK_SET)
5750         except OSError:
5751             enc = None  # silently skip read errors
5752         try:
5753             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5754             contents = optionf.read().decode(enc or preferredencoding())
5755             res = shlex.split(contents, comments=True)
5756         except Exception as err:
5757             raise ValueError(f'Unable to parse "{filename}": {err}')
5758         finally:
5759             optionf.close()
5760         return res
5761
5762     @staticmethod
5763     def hide_login_info(opts):
5764         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5765         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5766
5767         def _scrub_eq(o):
5768             m = eqre.match(o)
5769             if m:
5770                 return m.group('key') + '=PRIVATE'
5771             else:
5772                 return o
5773
5774         opts = list(map(_scrub_eq, opts))
5775         for idx, opt in enumerate(opts):
5776             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5777                 opts[idx + 1] = 'PRIVATE'
5778         return opts
5779
5780     def append_config(self, *args, label=None):
5781         config = type(self)(self.parser, label)
5782         config._loaded_paths = self._loaded_paths
5783         if config.init(*args):
5784             self.configs.append(config)
5785
5786     @property
5787     def all_args(self):
5788         for config in reversed(self.configs):
5789             yield from config.all_args
5790         yield from self.parsed_args or []
5791
5792     def parse_known_args(self, **kwargs):
5793         return self.parser.parse_known_args(self.all_args, **kwargs)
5794
5795     def parse_args(self):
5796         return self.parser.parse_args(self.all_args)
5797
5798
5799 class WebSocketsWrapper:
5800     """Wraps websockets module to use in non-async scopes"""
5801     pool = None
5802
5803     def __init__(self, url, headers=None, connect=True):
5804         self.loop = asyncio.new_event_loop()
5805         # XXX: "loop" is deprecated
5806         self.conn = websockets.connect(
5807             url, extra_headers=headers, ping_interval=None,
5808             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5809         if connect:
5810             self.__enter__()
5811         atexit.register(self.__exit__, None, None, None)
5812
5813     def __enter__(self):
5814         if not self.pool:
5815             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5816         return self
5817
5818     def send(self, *args):
5819         self.run_with_loop(self.pool.send(*args), self.loop)
5820
5821     def recv(self, *args):
5822         return self.run_with_loop(self.pool.recv(*args), self.loop)
5823
5824     def __exit__(self, type, value, traceback):
5825         try:
5826             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5827         finally:
5828             self.loop.close()
5829             self._cancel_all_tasks(self.loop)
5830
5831     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5832     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5833     @staticmethod
5834     def run_with_loop(main, loop):
5835         if not asyncio.iscoroutine(main):
5836             raise ValueError(f'a coroutine was expected, got {main!r}')
5837
5838         try:
5839             return loop.run_until_complete(main)
5840         finally:
5841             loop.run_until_complete(loop.shutdown_asyncgens())
5842             if hasattr(loop, 'shutdown_default_executor'):
5843                 loop.run_until_complete(loop.shutdown_default_executor())
5844
5845     @staticmethod
5846     def _cancel_all_tasks(loop):
5847         to_cancel = asyncio.all_tasks(loop)
5848
5849         if not to_cancel:
5850             return
5851
5852         for task in to_cancel:
5853             task.cancel()
5854
5855         # XXX: "loop" is removed in python 3.10+
5856         loop.run_until_complete(
5857             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5858
5859         for task in to_cancel:
5860             if task.cancelled():
5861                 continue
5862             if task.exception() is not None:
5863                 loop.call_exception_handler({
5864                     'message': 'unhandled exception during asyncio.run() shutdown',
5865                     'exception': task.exception(),
5866                     'task': task,
5867                 })
5868
5869
5870 def merge_headers(*dicts):
5871     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5872     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5873
5874
5875 def cached_method(f):
5876     """Cache a method"""
5877     signature = inspect.signature(f)
5878
5879     @functools.wraps(f)
5880     def wrapper(self, *args, **kwargs):
5881         bound_args = signature.bind(self, *args, **kwargs)
5882         bound_args.apply_defaults()
5883         key = tuple(bound_args.arguments.values())[1:]
5884
5885         cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {})
5886         if key not in cache:
5887             cache[key] = f(self, *args, **kwargs)
5888         return cache[key]
5889     return wrapper
5890
5891
5892 class classproperty:
5893     """property access for class methods with optional caching"""
5894     def __new__(cls, func=None, *args, **kwargs):
5895         if not func:
5896             return functools.partial(cls, *args, **kwargs)
5897         return super().__new__(cls)
5898
5899     def __init__(self, func, *, cache=False):
5900         functools.update_wrapper(self, func)
5901         self.func = func
5902         self._cache = {} if cache else None
5903
5904     def __get__(self, _, cls):
5905         if self._cache is None:
5906             return self.func(cls)
5907         elif cls not in self._cache:
5908             self._cache[cls] = self.func(cls)
5909         return self._cache[cls]
5910
5911
5912 class Namespace(types.SimpleNamespace):
5913     """Immutable namespace"""
5914
5915     def __iter__(self):
5916         return iter(self.__dict__.values())
5917
5918     @property
5919     def items_(self):
5920         return self.__dict__.items()
5921
5922
5923 MEDIA_EXTENSIONS = Namespace(
5924     common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
5925     video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
5926     common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5927     audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'),
5928     thumbnails=('jpg', 'png', 'webp'),
5929     storyboards=('mhtml', ),
5930     subtitles=('srt', 'vtt', 'ass', 'lrc'),
5931     manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5932 )
5933 MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
5934 MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
5935
5936 KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
5937
5938
5939 class RetryManager:
5940     """Usage:
5941         for retry in RetryManager(...):
5942             try:
5943                 ...
5944             except SomeException as err:
5945                 retry.error = err
5946                 continue
5947     """
5948     attempt, _error = 0, None
5949
5950     def __init__(self, _retries, _error_callback, **kwargs):
5951         self.retries = _retries or 0
5952         self.error_callback = functools.partial(_error_callback, **kwargs)
5953
5954     def _should_retry(self):
5955         return self._error is not NO_DEFAULT and self.attempt <= self.retries
5956
5957     @property
5958     def error(self):
5959         if self._error is NO_DEFAULT:
5960             return None
5961         return self._error
5962
5963     @error.setter
5964     def error(self, value):
5965         self._error = value
5966
5967     def __iter__(self):
5968         while self._should_retry():
5969             self.error = NO_DEFAULT
5970             self.attempt += 1
5971             yield self
5972             if self.error:
5973                 self.error_callback(self.error, self.attempt, self.retries)
5974
5975     @staticmethod
5976     def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
5977         """Utility function for reporting retries"""
5978         if count > retries:
5979             if error:
5980                 return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
5981             raise e
5982
5983         if not count:
5984             return warn(e)
5985         elif isinstance(e, ExtractorError):
5986             e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
5987         warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5988
5989         delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
5990         if delay:
5991             info(f'Sleeping {delay:.2f} seconds ...')
5992             time.sleep(delay)
5993
5994
5995 def make_archive_id(ie, video_id):
5996     ie_key = ie if isinstance(ie, str) else ie.ie_key()
5997     return f'{ie_key.lower()} {video_id}'
5998
5999
6000 def truncate_string(s, left, right=0):
6001     assert left > 3 and right >= 0
6002     if s is None or len(s) <= left + right:
6003         return s
6004     return f'{s[:left-3]}...{s[-right:] if right else ""}'
6005
6006
6007 def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
6008     assert 'all' in alias_dict, '"all" alias is required'
6009     requested = list(start or [])
6010     for val in options:
6011         discard = val.startswith('-')
6012         if discard:
6013             val = val[1:]
6014
6015         if val in alias_dict:
6016             val = alias_dict[val] if not discard else [
6017                 i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
6018             # NB: Do not allow regex in aliases for performance
6019             requested = orderedSet_from_options(val, alias_dict, start=requested)
6020             continue
6021
6022         current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
6023                    else [val] if val in alias_dict['all'] else None)
6024         if current is None:
6025             raise ValueError(val)
6026
6027         if discard:
6028             for item in current:
6029                 while item in requested:
6030                     requested.remove(item)
6031         else:
6032             requested.extend(current)
6033
6034     return orderedSet(requested)
6035
6036
6037 class FormatSorter:
6038     regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
6039
6040     default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
6041                'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
6042                'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id')  # These must not be aliases
6043     ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
6044                     'height', 'width', 'proto', 'vext', 'abr', 'aext',
6045                     'fps', 'fs_approx', 'source', 'id')
6046
6047     settings = {
6048         'vcodec': {'type': 'ordered', 'regex': True,
6049                    'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
6050         'acodec': {'type': 'ordered', 'regex': True,
6051                    'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
6052         'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
6053                 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
6054         'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
6055                   'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
6056         'vext': {'type': 'ordered', 'field': 'video_ext',
6057                  'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
6058                  'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
6059         'aext': {'type': 'ordered', 'field': 'audio_ext',
6060                  'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
6061                  'order_free': ('ogg', 'opus', 'webm', 'mp3', 'm4a', 'aac', '', 'none')},
6062         'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
6063         'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
6064                        'field': ('vcodec', 'acodec'),
6065                        'function': lambda it: int(any(v != 'none' for v in it))},
6066         'ie_pref': {'priority': True, 'type': 'extractor'},
6067         'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
6068         'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
6069         'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
6070         'quality': {'convert': 'float', 'default': -1},
6071         'filesize': {'convert': 'bytes'},
6072         'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
6073         'id': {'convert': 'string', 'field': 'format_id'},
6074         'height': {'convert': 'float_none'},
6075         'width': {'convert': 'float_none'},
6076         'fps': {'convert': 'float_none'},
6077         'channels': {'convert': 'float_none', 'field': 'audio_channels'},
6078         'tbr': {'convert': 'float_none'},
6079         'vbr': {'convert': 'float_none'},
6080         'abr': {'convert': 'float_none'},
6081         'asr': {'convert': 'float_none'},
6082         'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
6083
6084         'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
6085         'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
6086         'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
6087         'ext': {'type': 'combined', 'field': ('vext', 'aext')},
6088         'res': {'type': 'multiple', 'field': ('height', 'width'),
6089                 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
6090
6091         # Actual field names
6092         'format_id': {'type': 'alias', 'field': 'id'},
6093         'preference': {'type': 'alias', 'field': 'ie_pref'},
6094         'language_preference': {'type': 'alias', 'field': 'lang'},
6095         'source_preference': {'type': 'alias', 'field': 'source'},
6096         'protocol': {'type': 'alias', 'field': 'proto'},
6097         'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
6098         'audio_channels': {'type': 'alias', 'field': 'channels'},
6099
6100         # Deprecated
6101         'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
6102         'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
6103         'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
6104         'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
6105         'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
6106         'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
6107         'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
6108         'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
6109         'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
6110         'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
6111         'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
6112         'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
6113         'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
6114         'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
6115         'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
6116         'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
6117         'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
6118         'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
6119         'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
6120         'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
6121     }
6122
6123     def __init__(self, ydl, field_preference):
6124         self.ydl = ydl
6125         self._order = []
6126         self.evaluate_params(self.ydl.params, field_preference)
6127         if ydl.params.get('verbose'):
6128             self.print_verbose_info(self.ydl.write_debug)
6129
6130     def _get_field_setting(self, field, key):
6131         if field not in self.settings:
6132             if key in ('forced', 'priority'):
6133                 return False
6134             self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
6135                                         'deprecated and may be removed in a future version')
6136             self.settings[field] = {}
6137         propObj = self.settings[field]
6138         if key not in propObj:
6139             type = propObj.get('type')
6140             if key == 'field':
6141                 default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field
6142             elif key == 'convert':
6143                 default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
6144             else:
6145                 default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
6146             propObj[key] = default
6147         return propObj[key]
6148
6149     def _resolve_field_value(self, field, value, convertNone=False):
6150         if value is None:
6151             if not convertNone:
6152                 return None
6153         else:
6154             value = value.lower()
6155         conversion = self._get_field_setting(field, 'convert')
6156         if conversion == 'ignore':
6157             return None
6158         if conversion == 'string':
6159             return value
6160         elif conversion == 'float_none':
6161             return float_or_none(value)
6162         elif conversion == 'bytes':
6163             return parse_bytes(value)
6164         elif conversion == 'order':
6165             order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
6166             use_regex = self._get_field_setting(field, 'regex')
6167             list_length = len(order_list)
6168             empty_pos = order_list.index('') if '' in order_list else list_length + 1
6169             if use_regex and value is not None:
6170                 for i, regex in enumerate(order_list):
6171                     if regex and re.match(regex, value):
6172                         return list_length - i
6173                 return list_length - empty_pos  # not in list
6174             else:  # not regex or  value = None
6175                 return list_length - (order_list.index(value) if value in order_list else empty_pos)
6176         else:
6177             if value.isnumeric():
6178                 return float(value)
6179             else:
6180                 self.settings[field]['convert'] = 'string'
6181                 return value
6182
6183     def evaluate_params(self, params, sort_extractor):
6184         self._use_free_order = params.get('prefer_free_formats', False)
6185         self._sort_user = params.get('format_sort', [])
6186         self._sort_extractor = sort_extractor
6187
6188         def add_item(field, reverse, closest, limit_text):
6189             field = field.lower()
6190             if field in self._order:
6191                 return
6192             self._order.append(field)
6193             limit = self._resolve_field_value(field, limit_text)
6194             data = {
6195                 'reverse': reverse,
6196                 'closest': False if limit is None else closest,
6197                 'limit_text': limit_text,
6198                 'limit': limit}
6199             if field in self.settings:
6200                 self.settings[field].update(data)
6201             else:
6202                 self.settings[field] = data
6203
6204         sort_list = (
6205             tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
6206             + (tuple() if params.get('format_sort_force', False)
6207                 else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
6208             + tuple(self._sort_user) + tuple(sort_extractor) + self.default)
6209
6210         for item in sort_list:
6211             match = re.match(self.regex, item)
6212             if match is None:
6213                 raise ExtractorError('Invalid format sort string "%s" given by extractor' % item)
6214             field = match.group('field')
6215             if field is None:
6216                 continue
6217             if self._get_field_setting(field, 'type') == 'alias':
6218                 alias, field = field, self._get_field_setting(field, 'field')
6219                 if self._get_field_setting(alias, 'deprecated'):
6220                     self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
6221                                                 f'be removed in a future version. Please use {field} instead')
6222             reverse = match.group('reverse') is not None
6223             closest = match.group('separator') == '~'
6224             limit_text = match.group('limit')
6225
6226             has_limit = limit_text is not None
6227             has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
6228             has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
6229
6230             fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
6231             limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
6232             limit_count = len(limits)
6233             for (i, f) in enumerate(fields):
6234                 add_item(f, reverse, closest,
6235                          limits[i] if i < limit_count
6236                          else limits[0] if has_limit and not has_multiple_limits
6237                          else None)
6238
6239     def print_verbose_info(self, write_debug):
6240         if self._sort_user:
6241             write_debug('Sort order given by user: %s' % ', '.join(self._sort_user))
6242         if self._sort_extractor:
6243             write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
6244         write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
6245             '+' if self._get_field_setting(field, 'reverse') else '', field,
6246             '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
6247                           self._get_field_setting(field, 'limit_text'),
6248                           self._get_field_setting(field, 'limit'))
6249             if self._get_field_setting(field, 'limit_text') is not None else '')
6250             for field in self._order if self._get_field_setting(field, 'visible')]))
6251
6252     def _calculate_field_preference_from_value(self, format, field, type, value):
6253         reverse = self._get_field_setting(field, 'reverse')
6254         closest = self._get_field_setting(field, 'closest')
6255         limit = self._get_field_setting(field, 'limit')
6256
6257         if type == 'extractor':
6258             maximum = self._get_field_setting(field, 'max')
6259             if value is None or (maximum is not None and value >= maximum):
6260                 value = -1
6261         elif type == 'boolean':
6262             in_list = self._get_field_setting(field, 'in_list')
6263             not_in_list = self._get_field_setting(field, 'not_in_list')
6264             value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
6265         elif type == 'ordered':
6266             value = self._resolve_field_value(field, value, True)
6267
6268         # try to convert to number
6269         val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
6270         is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
6271         if is_num:
6272             value = val_num
6273
6274         return ((-10, 0) if value is None
6275                 else (1, value, 0) if not is_num  # if a field has mixed strings and numbers, strings are sorted higher
6276                 else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
6277                 else (0, value, 0) if not reverse and (limit is None or value <= limit)
6278                 else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
6279                 else (-1, value, 0))
6280
6281     def _calculate_field_preference(self, format, field):
6282         type = self._get_field_setting(field, 'type')  # extractor, boolean, ordered, field, multiple
6283         get_value = lambda f: format.get(self._get_field_setting(f, 'field'))
6284         if type == 'multiple':
6285             type = 'field'  # Only 'field' is allowed in multiple for now
6286             actual_fields = self._get_field_setting(field, 'field')
6287
6288             value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
6289         else:
6290             value = get_value(field)
6291         return self._calculate_field_preference_from_value(format, field, type, value)
6292
6293     def calculate_preference(self, format):
6294         # Determine missing protocol
6295         if not format.get('protocol'):
6296             format['protocol'] = determine_protocol(format)
6297
6298         # Determine missing ext
6299         if not format.get('ext') and 'url' in format:
6300             format['ext'] = determine_ext(format['url'])
6301         if format.get('vcodec') == 'none':
6302             format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
6303             format['video_ext'] = 'none'
6304         else:
6305             format['video_ext'] = format['ext']
6306             format['audio_ext'] = 'none'
6307         # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'):  # Not supported?
6308         #    format['preference'] = -1000
6309
6310         # Determine missing bitrates
6311         if format.get('tbr') is None:
6312             if format.get('vbr') is not None and format.get('abr') is not None:
6313                 format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
6314         else:
6315             if format.get('vcodec') != 'none' and format.get('vbr') is None:
6316                 format['vbr'] = format.get('tbr') - format.get('abr', 0)
6317             if format.get('acodec') != 'none' and format.get('abr') is None:
6318                 format['abr'] = format.get('tbr') - format.get('vbr', 0)
6319
6320         return tuple(self._calculate_field_preference(format, field) for field in self._order)
6321
6322
6323 # Deprecated
6324 has_certifi = bool(certifi)
6325 has_websockets = bool(websockets)