yt_dlp/utils.py

   1 import asyncio
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import collections.abc
   9 import contextlib
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import html.entities
  18 import html.parser
  19 import http.client
  20 import http.cookiejar
  21 import importlib.util
  22 import inspect
  23 import io
  24 import itertools
  25 import json
  26 import locale
  27 import math
  28 import mimetypes
  29 import operator
  30 import os
  31 import platform
  32 import random
  33 import re
  34 import shlex
  35 import socket
  36 import ssl
  37 import struct
  38 import subprocess
  39 import sys
  40 import tempfile
  41 import time
  42 import traceback
  43 import types
  44 import unicodedata
  45 import urllib.error
  46 import urllib.parse
  47 import urllib.request
  48 import xml.etree.ElementTree
  49 import zlib
  50
  51 from .compat import functools  # isort: split
  52 from .compat import (
  53     compat_etree_fromstring,
  54     compat_expanduser,
  55     compat_HTMLParseError,
  56     compat_os_name,
  57     compat_shlex_quote,
  58 )
  59 from .dependencies import brotli, certifi, websockets, xattr
  60 from .socks import ProxyType, sockssocket
  61
  62
  63 def register_socks_protocols():
  64     # "Register" SOCKS protocols
  65     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  66     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  67     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  68         if scheme not in urllib.parse.uses_netloc:
  69             urllib.parse.uses_netloc.append(scheme)
  70
  71
  72 # This is not clearly defined otherwise
  73 compiled_regex_type = type(re.compile(''))
  74
  75
  76 def random_user_agent():
  77     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  78     _CHROME_VERSIONS = (
  79         '90.0.4430.212',
  80         '90.0.4430.24',
  81         '90.0.4430.70',
  82         '90.0.4430.72',
  83         '90.0.4430.85',
  84         '90.0.4430.93',
  85         '91.0.4472.101',
  86         '91.0.4472.106',
  87         '91.0.4472.114',
  88         '91.0.4472.124',
  89         '91.0.4472.164',
  90         '91.0.4472.19',
  91         '91.0.4472.77',
  92         '92.0.4515.107',
  93         '92.0.4515.115',
  94         '92.0.4515.131',
  95         '92.0.4515.159',
  96         '92.0.4515.43',
  97         '93.0.4556.0',
  98         '93.0.4577.15',
  99         '93.0.4577.63',
 100         '93.0.4577.82',
 101         '94.0.4606.41',
 102         '94.0.4606.54',
 103         '94.0.4606.61',
 104         '94.0.4606.71',
 105         '94.0.4606.81',
 106         '94.0.4606.85',
 107         '95.0.4638.17',
 108         '95.0.4638.50',
 109         '95.0.4638.54',
 110         '95.0.4638.69',
 111         '95.0.4638.74',
 112         '96.0.4664.18',
 113         '96.0.4664.45',
 114         '96.0.4664.55',
 115         '96.0.4664.93',
 116         '97.0.4692.20',
 117     )
 118     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 119
 120
 121 SUPPORTED_ENCODINGS = [
 122     'gzip', 'deflate'
 123 ]
 124 if brotli:
 125     SUPPORTED_ENCODINGS.append('br')
 126
 127 std_headers = {
 128     'User-Agent': random_user_agent(),
 129     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 130     'Accept-Language': 'en-us,en;q=0.5',
 131     'Sec-Fetch-Mode': 'navigate',
 132 }
 133
 134
 135 USER_AGENTS = {
 136     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 137 }
 138
 139
 140 NO_DEFAULT = object()
 141 IDENTITY = lambda x: x
 142
 143 ENGLISH_MONTH_NAMES = [
 144     'January', 'February', 'March', 'April', 'May', 'June',
 145     'July', 'August', 'September', 'October', 'November', 'December']
 146
 147 MONTH_NAMES = {
 148     'en': ENGLISH_MONTH_NAMES,
 149     'fr': [
 150         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 151         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 152     # these follow the genitive grammatical case (dopełniacz)
 153     # some websites might be using nominative, which will require another month list
 154     # https://en.wikibooks.org/wiki/Polish/Noun_cases
 155     'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
 156            'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
 157 }
 158
 159 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
 160 TIMEZONE_NAMES = {
 161     'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
 162     'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
 163     'EST': -5, 'EDT': -4,  # Eastern
 164     'CST': -6, 'CDT': -5,  # Central
 165     'MST': -7, 'MDT': -6,  # Mountain
 166     'PST': -8, 'PDT': -7   # Pacific
 167 }
 168
 169 # needed for sanitizing filenames in restricted mode
 170 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 171                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 172                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 173
 174 DATE_FORMATS = (
 175     '%d %B %Y',
 176     '%d %b %Y',
 177     '%B %d %Y',
 178     '%B %dst %Y',
 179     '%B %dnd %Y',
 180     '%B %drd %Y',
 181     '%B %dth %Y',
 182     '%b %d %Y',
 183     '%b %dst %Y',
 184     '%b %dnd %Y',
 185     '%b %drd %Y',
 186     '%b %dth %Y',
 187     '%b %dst %Y %I:%M',
 188     '%b %dnd %Y %I:%M',
 189     '%b %drd %Y %I:%M',
 190     '%b %dth %Y %I:%M',
 191     '%Y %m %d',
 192     '%Y-%m-%d',
 193     '%Y.%m.%d.',
 194     '%Y/%m/%d',
 195     '%Y/%m/%d %H:%M',
 196     '%Y/%m/%d %H:%M:%S',
 197     '%Y%m%d%H%M',
 198     '%Y%m%d%H%M%S',
 199     '%Y%m%d',
 200     '%Y-%m-%d %H:%M',
 201     '%Y-%m-%d %H:%M:%S',
 202     '%Y-%m-%d %H:%M:%S.%f',
 203     '%Y-%m-%d %H:%M:%S:%f',
 204     '%d.%m.%Y %H:%M',
 205     '%d.%m.%Y %H.%M',
 206     '%Y-%m-%dT%H:%M:%SZ',
 207     '%Y-%m-%dT%H:%M:%S.%fZ',
 208     '%Y-%m-%dT%H:%M:%S.%f0Z',
 209     '%Y-%m-%dT%H:%M:%S',
 210     '%Y-%m-%dT%H:%M:%S.%f',
 211     '%Y-%m-%dT%H:%M',
 212     '%b %d %Y at %H:%M',
 213     '%b %d %Y at %H:%M:%S',
 214     '%B %d %Y at %H:%M',
 215     '%B %d %Y at %H:%M:%S',
 216     '%H:%M %d-%b-%Y',
 217 )
 218
 219 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 220 DATE_FORMATS_DAY_FIRST.extend([
 221     '%d-%m-%Y',
 222     '%d.%m.%Y',
 223     '%d.%m.%y',
 224     '%d/%m/%Y',
 225     '%d/%m/%y',
 226     '%d/%m/%Y %H:%M:%S',
 227     '%d-%m-%Y %H:%M',
 228 ])
 229
 230 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 231 DATE_FORMATS_MONTH_FIRST.extend([
 232     '%m-%d-%Y',
 233     '%m.%d.%Y',
 234     '%m/%d/%Y',
 235     '%m/%d/%y',
 236     '%m/%d/%Y %H:%M:%S',
 237 ])
 238
 239 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 240 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
 241
 242 NUMBER_RE = r'\d+(?:\.\d+)?'
 243
 244
 245 @functools.cache
 246 def preferredencoding():
 247     """Get preferred encoding.
 248
 249     Returns the best encoding scheme for the system, based on
 250     locale.getpreferredencoding() and some further tweaks.
 251     """
 252     try:
 253         pref = locale.getpreferredencoding()
 254         'TEST'.encode(pref)
 255     except Exception:
 256         pref = 'UTF-8'
 257
 258     return pref
 259
 260
 261 def write_json_file(obj, fn):
 262     """ Encode obj as JSON and write it to fn, atomically if possible """
 263
 264     tf = tempfile.NamedTemporaryFile(
 265         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 266         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 267
 268     try:
 269         with tf:
 270             json.dump(obj, tf, ensure_ascii=False)
 271         if sys.platform == 'win32':
 272             # Need to remove existing file on Windows, else os.rename raises
 273             # WindowsError or FileExistsError.
 274             with contextlib.suppress(OSError):
 275                 os.unlink(fn)
 276         with contextlib.suppress(OSError):
 277             mask = os.umask(0)
 278             os.umask(mask)
 279             os.chmod(tf.name, 0o666 & ~mask)
 280         os.rename(tf.name, fn)
 281     except Exception:
 282         with contextlib.suppress(OSError):
 283             os.remove(tf.name)
 284         raise
 285
 286
 287 def find_xpath_attr(node, xpath, key, val=None):
 288     """ Find the xpath xpath[@key=val] """
 289     assert re.match(r'^[a-zA-Z_-]+$', key)
 290     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 291     return node.find(expr)
 292
 293 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 294 # the namespace parameter
 295
 296
 297 def xpath_with_ns(path, ns_map):
 298     components = [c.split(':') for c in path.split('/')]
 299     replaced = []
 300     for c in components:
 301         if len(c) == 1:
 302             replaced.append(c[0])
 303         else:
 304             ns, tag = c
 305             replaced.append('{%s}%s' % (ns_map[ns], tag))
 306     return '/'.join(replaced)
 307
 308
 309 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 310     def _find_xpath(xpath):
 311         return node.find(xpath)
 312
 313     if isinstance(xpath, str):
 314         n = _find_xpath(xpath)
 315     else:
 316         for xp in xpath:
 317             n = _find_xpath(xp)
 318             if n is not None:
 319                 break
 320
 321     if n is None:
 322         if default is not NO_DEFAULT:
 323             return default
 324         elif fatal:
 325             name = xpath if name is None else name
 326             raise ExtractorError('Could not find XML element %s' % name)
 327         else:
 328             return None
 329     return n
 330
 331
 332 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 333     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 334     if n is None or n == default:
 335         return n
 336     if n.text is None:
 337         if default is not NO_DEFAULT:
 338             return default
 339         elif fatal:
 340             name = xpath if name is None else name
 341             raise ExtractorError('Could not find XML element\'s text %s' % name)
 342         else:
 343             return None
 344     return n.text
 345
 346
 347 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 348     n = find_xpath_attr(node, xpath, key)
 349     if n is None:
 350         if default is not NO_DEFAULT:
 351             return default
 352         elif fatal:
 353             name = f'{xpath}[@{key}]' if name is None else name
 354             raise ExtractorError('Could not find XML attribute %s' % name)
 355         else:
 356             return None
 357     return n.attrib[key]
 358
 359
 360 def get_element_by_id(id, html, **kwargs):
 361     """Return the content of the tag with the specified ID in the passed HTML document"""
 362     return get_element_by_attribute('id', id, html, **kwargs)
 363
 364
 365 def get_element_html_by_id(id, html, **kwargs):
 366     """Return the html of the tag with the specified ID in the passed HTML document"""
 367     return get_element_html_by_attribute('id', id, html, **kwargs)
 368
 369
 370 def get_element_by_class(class_name, html):
 371     """Return the content of the first tag with the specified class in the passed HTML document"""
 372     retval = get_elements_by_class(class_name, html)
 373     return retval[0] if retval else None
 374
 375
 376 def get_element_html_by_class(class_name, html):
 377     """Return the html of the first tag with the specified class in the passed HTML document"""
 378     retval = get_elements_html_by_class(class_name, html)
 379     return retval[0] if retval else None
 380
 381
 382 def get_element_by_attribute(attribute, value, html, **kwargs):
 383     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 384     return retval[0] if retval else None
 385
 386
 387 def get_element_html_by_attribute(attribute, value, html, **kargs):
 388     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 389     return retval[0] if retval else None
 390
 391
 392 def get_elements_by_class(class_name, html, **kargs):
 393     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 394     return get_elements_by_attribute(
 395         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 396         html, escape_value=False)
 397
 398
 399 def get_elements_html_by_class(class_name, html):
 400     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 401     return get_elements_html_by_attribute(
 402         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 403         html, escape_value=False)
 404
 405
 406 def get_elements_by_attribute(*args, **kwargs):
 407     """Return the content of the tag with the specified attribute in the passed HTML document"""
 408     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 409
 410
 411 def get_elements_html_by_attribute(*args, **kwargs):
 412     """Return the html of the tag with the specified attribute in the passed HTML document"""
 413     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 414
 415
 416 def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
 417     """
 418     Return the text (content) and the html (whole) of the tag with the specified
 419     attribute in the passed HTML document
 420     """
 421     if not value:
 422         return
 423
 424     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 425
 426     value = re.escape(value) if escape_value else value
 427
 428     partial_element_re = rf'''(?x)
 429         <(?P<tag>{tag})
 430          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 431          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 432         '''
 433
 434     for m in re.finditer(partial_element_re, html):
 435         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 436
 437         yield (
 438             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 439             whole
 440         )
 441
 442
 443 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
 444     """
 445     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 446     closing tag for the first opening tag it has encountered, and can be used
 447     as a context manager
 448     """
 449
 450     class HTMLBreakOnClosingTagException(Exception):
 451         pass
 452
 453     def __init__(self):
 454         self.tagstack = collections.deque()
 455         html.parser.HTMLParser.__init__(self)
 456
 457     def __enter__(self):
 458         return self
 459
 460     def __exit__(self, *_):
 461         self.close()
 462
 463     def close(self):
 464         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 465         # so data remains buffered; we no longer have any interest in it, thus
 466         # override this method to discard it
 467         pass
 468
 469     def handle_starttag(self, tag, _):
 470         self.tagstack.append(tag)
 471
 472     def handle_endtag(self, tag):
 473         if not self.tagstack:
 474             raise compat_HTMLParseError('no tags in the stack')
 475         while self.tagstack:
 476             inner_tag = self.tagstack.pop()
 477             if inner_tag == tag:
 478                 break
 479         else:
 480             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 481         if not self.tagstack:
 482             raise self.HTMLBreakOnClosingTagException()
 483
 484
 485 # XXX: This should be far less strict
 486 def get_element_text_and_html_by_tag(tag, html):
 487     """
 488     For the first element with the specified tag in the passed HTML document
 489     return its' content (text) and the whole element (html)
 490     """
 491     def find_or_raise(haystack, needle, exc):
 492         try:
 493             return haystack.index(needle)
 494         except ValueError:
 495             raise exc
 496     closing_tag = f'</{tag}>'
 497     whole_start = find_or_raise(
 498         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 499     content_start = find_or_raise(
 500         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 501     content_start += whole_start + 1
 502     with HTMLBreakOnClosingTagParser() as parser:
 503         parser.feed(html[whole_start:content_start])
 504         if not parser.tagstack or parser.tagstack[0] != tag:
 505             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 506         offset = content_start
 507         while offset < len(html):
 508             next_closing_tag_start = find_or_raise(
 509                 html[offset:], closing_tag,
 510                 compat_HTMLParseError(f'closing {tag} tag not found'))
 511             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 512             try:
 513                 parser.feed(html[offset:offset + next_closing_tag_end])
 514                 offset += next_closing_tag_end
 515             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 516                 return html[content_start:offset + next_closing_tag_start], \
 517                     html[whole_start:offset + next_closing_tag_end]
 518         raise compat_HTMLParseError('unexpected end of html')
 519
 520
 521 class HTMLAttributeParser(html.parser.HTMLParser):
 522     """Trivial HTML parser to gather the attributes for a single element"""
 523
 524     def __init__(self):
 525         self.attrs = {}
 526         html.parser.HTMLParser.__init__(self)
 527
 528     def handle_starttag(self, tag, attrs):
 529         self.attrs = dict(attrs)
 530         raise compat_HTMLParseError('done')
 531
 532
 533 class HTMLListAttrsParser(html.parser.HTMLParser):
 534     """HTML parser to gather the attributes for the elements of a list"""
 535
 536     def __init__(self):
 537         html.parser.HTMLParser.__init__(self)
 538         self.items = []
 539         self._level = 0
 540
 541     def handle_starttag(self, tag, attrs):
 542         if tag == 'li' and self._level == 0:
 543             self.items.append(dict(attrs))
 544         self._level += 1
 545
 546     def handle_endtag(self, tag):
 547         self._level -= 1
 548
 549
 550 def extract_attributes(html_element):
 551     """Given a string for an HTML element such as
 552     <el
 553          a="foo" B="bar" c="&98;az" d=boz
 554          empty= noval entity="&amp;"
 555          sq='"' dq="'"
 556     >
 557     Decode and return a dictionary of attributes.
 558     {
 559         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 560         'empty': '', 'noval': None, 'entity': '&',
 561         'sq': '"', 'dq': '\''
 562     }.
 563     """
 564     parser = HTMLAttributeParser()
 565     with contextlib.suppress(compat_HTMLParseError):
 566         parser.feed(html_element)
 567         parser.close()
 568     return parser.attrs
 569
 570
 571 def parse_list(webpage):
 572     """Given a string for an series of HTML <li> elements,
 573     return a dictionary of their attributes"""
 574     parser = HTMLListAttrsParser()
 575     parser.feed(webpage)
 576     parser.close()
 577     return parser.items
 578
 579
 580 def clean_html(html):
 581     """Clean an HTML snippet into a readable string"""
 582
 583     if html is None:  # Convenience for sanitizing descriptions etc.
 584         return html
 585
 586     html = re.sub(r'\s+', ' ', html)
 587     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 588     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 589     # Strip html tags
 590     html = re.sub('<.*?>', '', html)
 591     # Replace html entities
 592     html = unescapeHTML(html)
 593     return html.strip()
 594
 595
 596 class LenientJSONDecoder(json.JSONDecoder):
 597     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 598         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 599         super().__init__(*args, **kwargs)
 600
 601     def decode(self, s):
 602         if self.transform_source:
 603             s = self.transform_source(s)
 604         try:
 605             if self.ignore_extra:
 606                 return self.raw_decode(s.lstrip())[0]
 607             return super().decode(s)
 608         except json.JSONDecodeError as e:
 609             if e.pos is not None:
 610                 raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
 611             raise
 612
 613
 614 def sanitize_open(filename, open_mode):
 615     """Try to open the given filename, and slightly tweak it if this fails.
 616
 617     Attempts to open the given filename. If this fails, it tries to change
 618     the filename slightly, step by step, until it's either able to open it
 619     or it fails and raises a final exception, like the standard open()
 620     function.
 621
 622     It returns the tuple (stream, definitive_file_name).
 623     """
 624     if filename == '-':
 625         if sys.platform == 'win32':
 626             import msvcrt
 627
 628             # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
 629             with contextlib.suppress(io.UnsupportedOperation):
 630                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 631         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 632
 633     for attempt in range(2):
 634         try:
 635             try:
 636                 if sys.platform == 'win32':
 637                     # FIXME: An exclusive lock also locks the file from being read.
 638                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 639                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 640                     raise LockingUnsupportedError()
 641                 stream = locked_file(filename, open_mode, block=False).__enter__()
 642             except OSError:
 643                 stream = open(filename, open_mode)
 644             return stream, filename
 645         except OSError as err:
 646             if attempt or err.errno in (errno.EACCES,):
 647                 raise
 648             old_filename, filename = filename, sanitize_path(filename)
 649             if old_filename == filename:
 650                 raise
 651
 652
 653 def timeconvert(timestr):
 654     """Convert RFC 2822 defined time string into system timestamp"""
 655     timestamp = None
 656     timetuple = email.utils.parsedate_tz(timestr)
 657     if timetuple is not None:
 658         timestamp = email.utils.mktime_tz(timetuple)
 659     return timestamp
 660
 661
 662 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 663     """Sanitizes a string so it could be used as part of a filename.
 664     @param restricted   Use a stricter subset of allowed characters
 665     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 666                         If unset, yt-dlp's new sanitization rules are in effect
 667     """
 668     if s == '':
 669         return ''
 670
 671     def replace_insane(char):
 672         if restricted and char in ACCENT_CHARS:
 673             return ACCENT_CHARS[char]
 674         elif not restricted and char == '\n':
 675             return '\0 '
 676         elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
 677             # Replace with their full-width unicode counterparts
 678             return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
 679         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 680             return ''
 681         elif char == '"':
 682             return '' if restricted else '\''
 683         elif char == ':':
 684             return '\0_\0-' if restricted else '\0 \0-'
 685         elif char in '\\/|*<>':
 686             return '\0_'
 687         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 688             return '\0_'
 689         return char
 690
 691     # Replace look-alike Unicode glyphs
 692     if restricted and (is_id is NO_DEFAULT or not is_id):
 693         s = unicodedata.normalize('NFKC', s)
 694     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 695     result = ''.join(map(replace_insane, s))
 696     if is_id is NO_DEFAULT:
 697         result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
 698         STRIP_RE = r'(?:\0.|[ _-])*'
 699         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 700     result = result.replace('\0', '') or '_'
 701
 702     if not is_id:
 703         while '__' in result:
 704             result = result.replace('__', '_')
 705         result = result.strip('_')
 706         # Common case of "Foreign band name - English song title"
 707         if restricted and result.startswith('-_'):
 708             result = result[2:]
 709         if result.startswith('-'):
 710             result = '_' + result[len('-'):]
 711         result = result.lstrip('.')
 712         if not result:
 713             result = '_'
 714     return result
 715
 716
 717 def sanitize_path(s, force=False):
 718     """Sanitizes and normalizes path on Windows"""
 719     if sys.platform == 'win32':
 720         force = False
 721         drive_or_unc, _ = os.path.splitdrive(s)
 722     elif force:
 723         drive_or_unc = ''
 724     else:
 725         return s
 726
 727     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 728     if drive_or_unc:
 729         norm_path.pop(0)
 730     sanitized_path = [
 731         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 732         for path_part in norm_path]
 733     if drive_or_unc:
 734         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 735     elif force and s and s[0] == os.path.sep:
 736         sanitized_path.insert(0, os.path.sep)
 737     return os.path.join(*sanitized_path)
 738
 739
 740 def sanitize_url(url, *, scheme='http'):
 741     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 742     # the number of unwanted failures due to missing protocol
 743     if url is None:
 744         return
 745     elif url.startswith('//'):
 746         return f'{scheme}:{url}'
 747     # Fix some common typos seen so far
 748     COMMON_TYPOS = (
 749         # https://github.com/ytdl-org/youtube-dl/issues/15649
 750         (r'^httpss://', r'https://'),
 751         # https://bx1.be/lives/direct-tv/
 752         (r'^rmtp([es]?)://', r'rtmp\1://'),
 753     )
 754     for mistake, fixup in COMMON_TYPOS:
 755         if re.match(mistake, url):
 756             return re.sub(mistake, fixup, url)
 757     return url
 758
 759
 760 def extract_basic_auth(url):
 761     parts = urllib.parse.urlsplit(url)
 762     if parts.username is None:
 763         return url, None
 764     url = urllib.parse.urlunsplit(parts._replace(netloc=(
 765         parts.hostname if parts.port is None
 766         else '%s:%d' % (parts.hostname, parts.port))))
 767     auth_payload = base64.b64encode(
 768         ('%s:%s' % (parts.username, parts.password or '')).encode())
 769     return url, f'Basic {auth_payload.decode()}'
 770
 771
 772 def sanitized_Request(url, *args, **kwargs):
 773     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 774     if auth_header is not None:
 775         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 776         headers['Authorization'] = auth_header
 777     return urllib.request.Request(url, *args, **kwargs)
 778
 779
 780 def expand_path(s):
 781     """Expand shell variables and ~"""
 782     return os.path.expandvars(compat_expanduser(s))
 783
 784
 785 def orderedSet(iterable, *, lazy=False):
 786     """Remove all duplicates from the input iterable"""
 787     def _iter():
 788         seen = []  # Do not use set since the items can be unhashable
 789         for x in iterable:
 790             if x not in seen:
 791                 seen.append(x)
 792                 yield x
 793
 794     return _iter() if lazy else list(_iter())
 795
 796
 797 def _htmlentity_transform(entity_with_semicolon):
 798     """Transforms an HTML entity to a character."""
 799     entity = entity_with_semicolon[:-1]
 800
 801     # Known non-numeric HTML entity
 802     if entity in html.entities.name2codepoint:
 803         return chr(html.entities.name2codepoint[entity])
 804
 805     # TODO: HTML5 allows entities without a semicolon.
 806     # E.g. '&Eacuteric' should be decoded as 'Éric'.
 807     if entity_with_semicolon in html.entities.html5:
 808         return html.entities.html5[entity_with_semicolon]
 809
 810     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 811     if mobj is not None:
 812         numstr = mobj.group(1)
 813         if numstr.startswith('x'):
 814             base = 16
 815             numstr = '0%s' % numstr
 816         else:
 817             base = 10
 818         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 819         with contextlib.suppress(ValueError):
 820             return chr(int(numstr, base))
 821
 822     # Unknown entity in name, return its literal representation
 823     return '&%s;' % entity
 824
 825
 826 def unescapeHTML(s):
 827     if s is None:
 828         return None
 829     assert isinstance(s, str)
 830
 831     return re.sub(
 832         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 833
 834
 835 def escapeHTML(text):
 836     return (
 837         text
 838         .replace('&', '&amp;')
 839         .replace('<', '&lt;')
 840         .replace('>', '&gt;')
 841         .replace('"', '&quot;')
 842         .replace("'", '&#39;')
 843     )
 844
 845
 846 def process_communicate_or_kill(p, *args, **kwargs):
 847     deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
 848                         f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
 849     return Popen.communicate_or_kill(p, *args, **kwargs)
 850
 851
 852 class Popen(subprocess.Popen):
 853     if sys.platform == 'win32':
 854         _startupinfo = subprocess.STARTUPINFO()
 855         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 856     else:
 857         _startupinfo = None
 858
 859     @staticmethod
 860     def _fix_pyinstaller_ld_path(env):
 861         """Restore LD_LIBRARY_PATH when using PyInstaller
 862             Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
 863                  https://github.com/yt-dlp/yt-dlp/issues/4573
 864         """
 865         if not hasattr(sys, '_MEIPASS'):
 866             return
 867
 868         def _fix(key):
 869             orig = env.get(f'{key}_ORIG')
 870             if orig is None:
 871                 env.pop(key, None)
 872             else:
 873                 env[key] = orig
 874
 875         _fix('LD_LIBRARY_PATH')  # Linux
 876         _fix('DYLD_LIBRARY_PATH')  # macOS
 877
 878     def __init__(self, *args, env=None, text=False, **kwargs):
 879         if env is None:
 880             env = os.environ.copy()
 881         self._fix_pyinstaller_ld_path(env)
 882
 883         if text is True:
 884             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 885             kwargs.setdefault('encoding', 'utf-8')
 886             kwargs.setdefault('errors', 'replace')
 887         super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo)
 888
 889     def communicate_or_kill(self, *args, **kwargs):
 890         try:
 891             return self.communicate(*args, **kwargs)
 892         except BaseException:  # Including KeyboardInterrupt
 893             self.kill(timeout=None)
 894             raise
 895
 896     def kill(self, *, timeout=0):
 897         super().kill()
 898         if timeout != 0:
 899             self.wait(timeout=timeout)
 900
 901     @classmethod
 902     def run(cls, *args, timeout=None, **kwargs):
 903         with cls(*args, **kwargs) as proc:
 904             default = '' if proc.text_mode else b''
 905             stdout, stderr = proc.communicate_or_kill(timeout=timeout)
 906             return stdout or default, stderr or default, proc.returncode
 907
 908
 909 def get_subprocess_encoding():
 910     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 911         # For subprocess calls, encode with locale encoding
 912         # Refer to http://stackoverflow.com/a/9951851/35070
 913         encoding = preferredencoding()
 914     else:
 915         encoding = sys.getfilesystemencoding()
 916     if encoding is None:
 917         encoding = 'utf-8'
 918     return encoding
 919
 920
 921 def encodeFilename(s, for_subprocess=False):
 922     assert isinstance(s, str)
 923     return s
 924
 925
 926 def decodeFilename(b, for_subprocess=False):
 927     return b
 928
 929
 930 def encodeArgument(s):
 931     # Legacy code that uses byte strings
 932     # Uncomment the following line after fixing all post processors
 933     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
 934     return s if isinstance(s, str) else s.decode('ascii')
 935
 936
 937 def decodeArgument(b):
 938     return b
 939
 940
 941 def decodeOption(optval):
 942     if optval is None:
 943         return optval
 944     if isinstance(optval, bytes):
 945         optval = optval.decode(preferredencoding())
 946
 947     assert isinstance(optval, str)
 948     return optval
 949
 950
 951 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 952
 953
 954 def timetuple_from_msec(msec):
 955     secs, msec = divmod(msec, 1000)
 956     mins, secs = divmod(secs, 60)
 957     hrs, mins = divmod(mins, 60)
 958     return _timetuple(hrs, mins, secs, msec)
 959
 960
 961 def formatSeconds(secs, delim=':', msec=False):
 962     time = timetuple_from_msec(secs * 1000)
 963     if time.hours:
 964         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 965     elif time.minutes:
 966         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 967     else:
 968         ret = '%d' % time.seconds
 969     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 970
 971
 972 def _ssl_load_windows_store_certs(ssl_context, storename):
 973     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 974     try:
 975         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 976                  if encoding == 'x509_asn' and (
 977                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 978     except PermissionError:
 979         return
 980     for cert in certs:
 981         with contextlib.suppress(ssl.SSLError):
 982             ssl_context.load_verify_locations(cadata=cert)
 983
 984
 985 def make_HTTPS_handler(params, **kwargs):
 986     opts_check_certificate = not params.get('nocheckcertificate')
 987     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 988     context.check_hostname = opts_check_certificate
 989     if params.get('legacyserverconnect'):
 990         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 991         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 992         context.set_ciphers('DEFAULT')
 993     elif (
 994         sys.version_info < (3, 10)
 995         and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1)
 996         and not ssl.OPENSSL_VERSION.startswith('LibreSSL')
 997     ):
 998         # Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1].
 999         # This is to ensure consistent behavior across Python versions, and help avoid fingerprinting
1000         # in some situations [2][3].
1001         # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely
1002         # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe.
1003         # LibreSSL is excluded until further investigation due to cipher support issues [5][6].
1004         # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536
1005         # 2. https://github.com/yt-dlp/yt-dlp/issues/4627
1006         # 3. https://github.com/yt-dlp/yt-dlp/pull/5294
1007         # 4. https://peps.python.org/pep-0644/
1008         # 5. https://peps.python.org/pep-0644/#libressl-support
1009         # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368
1010         context.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM')
1011         context.minimum_version = ssl.TLSVersion.TLSv1_2
1012
1013     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
1014     if opts_check_certificate:
1015         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
1016             context.load_verify_locations(cafile=certifi.where())
1017         else:
1018             try:
1019                 context.load_default_certs()
1020                 # Work around the issue in load_default_certs when there are bad certificates. See:
1021                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
1022                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
1023             except ssl.SSLError:
1024                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
1025                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
1026                     for storename in ('CA', 'ROOT'):
1027                         _ssl_load_windows_store_certs(context, storename)
1028                 context.set_default_verify_paths()
1029
1030     client_certfile = params.get('client_certificate')
1031     if client_certfile:
1032         try:
1033             context.load_cert_chain(
1034                 client_certfile, keyfile=params.get('client_certificate_key'),
1035                 password=params.get('client_certificate_password'))
1036         except ssl.SSLError:
1037             raise YoutubeDLError('Unable to load client certificate')
1038
1039     # Some servers may reject requests if ALPN extension is not sent. See:
1040     # https://github.com/python/cpython/issues/85140
1041     # https://github.com/yt-dlp/yt-dlp/issues/3878
1042     with contextlib.suppress(NotImplementedError):
1043         context.set_alpn_protocols(['http/1.1'])
1044
1045     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
1046
1047
1048 def bug_reports_message(before=';'):
1049     from .update import REPOSITORY
1050
1051     msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
1052            'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')
1053
1054     before = before.rstrip()
1055     if not before or before.endswith(('.', '!', '?')):
1056         msg = msg[0].title() + msg[1:]
1057
1058     return (before + ' ' if before else '') + msg
1059
1060
1061 class YoutubeDLError(Exception):
1062     """Base exception for YoutubeDL errors."""
1063     msg = None
1064
1065     def __init__(self, msg=None):
1066         if msg is not None:
1067             self.msg = msg
1068         elif self.msg is None:
1069             self.msg = type(self).__name__
1070         super().__init__(self.msg)
1071
1072
1073 network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
1074 if hasattr(ssl, 'CertificateError'):
1075     network_exceptions.append(ssl.CertificateError)
1076 network_exceptions = tuple(network_exceptions)
1077
1078
1079 class ExtractorError(YoutubeDLError):
1080     """Error during info extraction."""
1081
1082     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1083         """ tb, if given, is the original traceback (so that it can be printed out).
1084         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1085         """
1086         if sys.exc_info()[0] in network_exceptions:
1087             expected = True
1088
1089         self.orig_msg = str(msg)
1090         self.traceback = tb
1091         self.expected = expected
1092         self.cause = cause
1093         self.video_id = video_id
1094         self.ie = ie
1095         self.exc_info = sys.exc_info()  # preserve original exception
1096         if isinstance(self.exc_info[1], ExtractorError):
1097             self.exc_info = self.exc_info[1].exc_info
1098
1099         super().__init__(''.join((
1100             format_field(ie, None, '[%s] '),
1101             format_field(video_id, None, '%s: '),
1102             msg,
1103             format_field(cause, None, ' (caused by %r)'),
1104             '' if expected else bug_reports_message())))
1105
1106     def format_traceback(self):
1107         return join_nonempty(
1108             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1109             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1110             delim='\n') or None
1111
1112
1113 class UnsupportedError(ExtractorError):
1114     def __init__(self, url):
1115         super().__init__(
1116             'Unsupported URL: %s' % url, expected=True)
1117         self.url = url
1118
1119
1120 class RegexNotFoundError(ExtractorError):
1121     """Error when a regex didn't match"""
1122     pass
1123
1124
1125 class GeoRestrictedError(ExtractorError):
1126     """Geographic restriction Error exception.
1127
1128     This exception may be thrown when a video is not available from your
1129     geographic location due to geographic restrictions imposed by a website.
1130     """
1131
1132     def __init__(self, msg, countries=None, **kwargs):
1133         kwargs['expected'] = True
1134         super().__init__(msg, **kwargs)
1135         self.countries = countries
1136
1137
1138 class UserNotLive(ExtractorError):
1139     """Error when a channel/user is not live"""
1140
1141     def __init__(self, msg=None, **kwargs):
1142         kwargs['expected'] = True
1143         super().__init__(msg or 'The channel is not currently live', **kwargs)
1144
1145
1146 class DownloadError(YoutubeDLError):
1147     """Download Error exception.
1148
1149     This exception may be thrown by FileDownloader objects if they are not
1150     configured to continue on errors. They will contain the appropriate
1151     error message.
1152     """
1153
1154     def __init__(self, msg, exc_info=None):
1155         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1156         super().__init__(msg)
1157         self.exc_info = exc_info
1158
1159
1160 class EntryNotInPlaylist(YoutubeDLError):
1161     """Entry not in playlist exception.
1162
1163     This exception will be thrown by YoutubeDL when a requested entry
1164     is not found in the playlist info_dict
1165     """
1166     msg = 'Entry not found in info'
1167
1168
1169 class SameFileError(YoutubeDLError):
1170     """Same File exception.
1171
1172     This exception will be thrown by FileDownloader objects if they detect
1173     multiple files would have to be downloaded to the same file on disk.
1174     """
1175     msg = 'Fixed output name but more than one file to download'
1176
1177     def __init__(self, filename=None):
1178         if filename is not None:
1179             self.msg += f': {filename}'
1180         super().__init__(self.msg)
1181
1182
1183 class PostProcessingError(YoutubeDLError):
1184     """Post Processing exception.
1185
1186     This exception may be raised by PostProcessor's .run() method to
1187     indicate an error in the postprocessing task.
1188     """
1189
1190
1191 class DownloadCancelled(YoutubeDLError):
1192     """ Exception raised when the download queue should be interrupted """
1193     msg = 'The download was cancelled'
1194
1195
1196 class ExistingVideoReached(DownloadCancelled):
1197     """ --break-on-existing triggered """
1198     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1199
1200
1201 class RejectedVideoReached(DownloadCancelled):
1202     """ --break-on-reject triggered """
1203     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1204
1205
1206 class MaxDownloadsReached(DownloadCancelled):
1207     """ --max-downloads limit has been reached. """
1208     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1209
1210
1211 class ReExtractInfo(YoutubeDLError):
1212     """ Video info needs to be re-extracted. """
1213
1214     def __init__(self, msg, expected=False):
1215         super().__init__(msg)
1216         self.expected = expected
1217
1218
1219 class ThrottledDownload(ReExtractInfo):
1220     """ Download speed below --throttled-rate. """
1221     msg = 'The download speed is below throttle limit'
1222
1223     def __init__(self):
1224         super().__init__(self.msg, expected=False)
1225
1226
1227 class UnavailableVideoError(YoutubeDLError):
1228     """Unavailable Format exception.
1229
1230     This exception will be thrown when a video is requested
1231     in a format that is not available for that video.
1232     """
1233     msg = 'Unable to download video'
1234
1235     def __init__(self, err=None):
1236         if err is not None:
1237             self.msg += f': {err}'
1238         super().__init__(self.msg)
1239
1240
1241 class ContentTooShortError(YoutubeDLError):
1242     """Content Too Short exception.
1243
1244     This exception may be raised by FileDownloader objects when a file they
1245     download is too small for what the server announced first, indicating
1246     the connection was probably interrupted.
1247     """
1248
1249     def __init__(self, downloaded, expected):
1250         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1251         # Both in bytes
1252         self.downloaded = downloaded
1253         self.expected = expected
1254
1255
1256 class XAttrMetadataError(YoutubeDLError):
1257     def __init__(self, code=None, msg='Unknown error'):
1258         super().__init__(msg)
1259         self.code = code
1260         self.msg = msg
1261
1262         # Parsing code and msg
1263         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1264                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1265             self.reason = 'NO_SPACE'
1266         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1267             self.reason = 'VALUE_TOO_LONG'
1268         else:
1269             self.reason = 'NOT_SUPPORTED'
1270
1271
1272 class XAttrUnavailableError(YoutubeDLError):
1273     pass
1274
1275
1276 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1277     hc = http_class(*args, **kwargs)
1278     source_address = ydl_handler._params.get('source_address')
1279
1280     if source_address is not None:
1281         # This is to workaround _create_connection() from socket where it will try all
1282         # address data from getaddrinfo() including IPv6. This filters the result from
1283         # getaddrinfo() based on the source_address value.
1284         # This is based on the cpython socket.create_connection() function.
1285         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1286         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1287             host, port = address
1288             err = None
1289             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1290             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1291             ip_addrs = [addr for addr in addrs if addr[0] == af]
1292             if addrs and not ip_addrs:
1293                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1294                 raise OSError(
1295                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1296                     % (ip_version, source_address[0]))
1297             for res in ip_addrs:
1298                 af, socktype, proto, canonname, sa = res
1299                 sock = None
1300                 try:
1301                     sock = socket.socket(af, socktype, proto)
1302                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1303                         sock.settimeout(timeout)
1304                     sock.bind(source_address)
1305                     sock.connect(sa)
1306                     err = None  # Explicitly break reference cycle
1307                     return sock
1308                 except OSError as _:
1309                     err = _
1310                     if sock is not None:
1311                         sock.close()
1312             if err is not None:
1313                 raise err
1314             else:
1315                 raise OSError('getaddrinfo returns an empty list')
1316         if hasattr(hc, '_create_connection'):
1317             hc._create_connection = _create_connection
1318         hc.source_address = (source_address, 0)
1319
1320     return hc
1321
1322
1323 def handle_youtubedl_headers(headers):
1324     filtered_headers = headers
1325
1326     if 'Youtubedl-no-compression' in filtered_headers:
1327         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1328         del filtered_headers['Youtubedl-no-compression']
1329
1330     return filtered_headers
1331
1332
1333 class YoutubeDLHandler(urllib.request.HTTPHandler):
1334     """Handler for HTTP requests and responses.
1335
1336     This class, when installed with an OpenerDirector, automatically adds
1337     the standard headers to every HTTP request and handles gzipped and
1338     deflated responses from web servers. If compression is to be avoided in
1339     a particular request, the original request in the program code only has
1340     to include the HTTP header "Youtubedl-no-compression", which will be
1341     removed before making the real request.
1342
1343     Part of this code was copied from:
1344
1345     http://techknack.net/python-urllib2-handlers/
1346
1347     Andrew Rowls, the author of that code, agreed to release it to the
1348     public domain.
1349     """
1350
1351     def __init__(self, params, *args, **kwargs):
1352         urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
1353         self._params = params
1354
1355     def http_open(self, req):
1356         conn_class = http.client.HTTPConnection
1357
1358         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1359         if socks_proxy:
1360             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1361             del req.headers['Ytdl-socks-proxy']
1362
1363         return self.do_open(functools.partial(
1364             _create_http_connection, self, conn_class, False),
1365             req)
1366
1367     @staticmethod
1368     def deflate(data):
1369         if not data:
1370             return data
1371         try:
1372             return zlib.decompress(data, -zlib.MAX_WBITS)
1373         except zlib.error:
1374             return zlib.decompress(data)
1375
1376     @staticmethod
1377     def brotli(data):
1378         if not data:
1379             return data
1380         return brotli.decompress(data)
1381
1382     def http_request(self, req):
1383         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1384         # always respected by websites, some tend to give out URLs with non percent-encoded
1385         # non-ASCII characters (see telemb.py, ard.py [#3412])
1386         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1387         # To work around aforementioned issue we will replace request's original URL with
1388         # percent-encoded one
1389         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1390         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1391         url = req.get_full_url()
1392         url_escaped = escape_url(url)
1393
1394         # Substitute URL if any change after escaping
1395         if url != url_escaped:
1396             req = update_Request(req, url=url_escaped)
1397
1398         for h, v in self._params.get('http_headers', std_headers).items():
1399             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1400             # The dict keys are capitalized because of this bug by urllib
1401             if h.capitalize() not in req.headers:
1402                 req.add_header(h, v)
1403
1404         if 'Accept-encoding' not in req.headers:
1405             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1406
1407         req.headers = handle_youtubedl_headers(req.headers)
1408
1409         return super().do_request_(req)
1410
1411     def http_response(self, req, resp):
1412         old_resp = resp
1413         # gzip
1414         if resp.headers.get('Content-encoding', '') == 'gzip':
1415             content = resp.read()
1416             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1417             try:
1418                 uncompressed = io.BytesIO(gz.read())
1419             except OSError as original_ioerror:
1420                 # There may be junk add the end of the file
1421                 # See http://stackoverflow.com/q/4928560/35070 for details
1422                 for i in range(1, 1024):
1423                     try:
1424                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1425                         uncompressed = io.BytesIO(gz.read())
1426                     except OSError:
1427                         continue
1428                     break
1429                 else:
1430                     raise original_ioerror
1431             resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1432             resp.msg = old_resp.msg
1433             del resp.headers['Content-encoding']
1434         # deflate
1435         if resp.headers.get('Content-encoding', '') == 'deflate':
1436             gz = io.BytesIO(self.deflate(resp.read()))
1437             resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1438             resp.msg = old_resp.msg
1439             del resp.headers['Content-encoding']
1440         # brotli
1441         if resp.headers.get('Content-encoding', '') == 'br':
1442             resp = urllib.request.addinfourl(
1443                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1444             resp.msg = old_resp.msg
1445             del resp.headers['Content-encoding']
1446         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1447         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1448         if 300 <= resp.code < 400:
1449             location = resp.headers.get('Location')
1450             if location:
1451                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1452                 location = location.encode('iso-8859-1').decode()
1453                 location_escaped = escape_url(location)
1454                 if location != location_escaped:
1455                     del resp.headers['Location']
1456                     resp.headers['Location'] = location_escaped
1457         return resp
1458
1459     https_request = http_request
1460     https_response = http_response
1461
1462
1463 def make_socks_conn_class(base_class, socks_proxy):
1464     assert issubclass(base_class, (
1465         http.client.HTTPConnection, http.client.HTTPSConnection))
1466
1467     url_components = urllib.parse.urlparse(socks_proxy)
1468     if url_components.scheme.lower() == 'socks5':
1469         socks_type = ProxyType.SOCKS5
1470     elif url_components.scheme.lower() in ('socks', 'socks4'):
1471         socks_type = ProxyType.SOCKS4
1472     elif url_components.scheme.lower() == 'socks4a':
1473         socks_type = ProxyType.SOCKS4A
1474
1475     def unquote_if_non_empty(s):
1476         if not s:
1477             return s
1478         return urllib.parse.unquote_plus(s)
1479
1480     proxy_args = (
1481         socks_type,
1482         url_components.hostname, url_components.port or 1080,
1483         True,  # Remote DNS
1484         unquote_if_non_empty(url_components.username),
1485         unquote_if_non_empty(url_components.password),
1486     )
1487
1488     class SocksConnection(base_class):
1489         def connect(self):
1490             self.sock = sockssocket()
1491             self.sock.setproxy(*proxy_args)
1492             if isinstance(self.timeout, (int, float)):
1493                 self.sock.settimeout(self.timeout)
1494             self.sock.connect((self.host, self.port))
1495
1496             if isinstance(self, http.client.HTTPSConnection):
1497                 if hasattr(self, '_context'):  # Python > 2.6
1498                     self.sock = self._context.wrap_socket(
1499                         self.sock, server_hostname=self.host)
1500                 else:
1501                     self.sock = ssl.wrap_socket(self.sock)
1502
1503     return SocksConnection
1504
1505
1506 class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
1507     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1508         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
1509         self._https_conn_class = https_conn_class or http.client.HTTPSConnection
1510         self._params = params
1511
1512     def https_open(self, req):
1513         kwargs = {}
1514         conn_class = self._https_conn_class
1515
1516         if hasattr(self, '_context'):  # python > 2.6
1517             kwargs['context'] = self._context
1518         if hasattr(self, '_check_hostname'):  # python 3.x
1519             kwargs['check_hostname'] = self._check_hostname
1520
1521         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1522         if socks_proxy:
1523             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1524             del req.headers['Ytdl-socks-proxy']
1525
1526         try:
1527             return self.do_open(
1528                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1529         except urllib.error.URLError as e:
1530             if (isinstance(e.reason, ssl.SSLError)
1531                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1532                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1533             raise
1534
1535
1536 def is_path_like(f):
1537     return isinstance(f, (str, bytes, os.PathLike))
1538
1539
1540 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1541     """
1542     See [1] for cookie file format.
1543
1544     1. https://curl.haxx.se/docs/http-cookies.html
1545     """
1546     _HTTPONLY_PREFIX = '#HttpOnly_'
1547     _ENTRY_LEN = 7
1548     _HEADER = '''# Netscape HTTP Cookie File
1549 # This file is generated by yt-dlp.  Do not edit.
1550
1551 '''
1552     _CookieFileEntry = collections.namedtuple(
1553         'CookieFileEntry',
1554         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1555
1556     def __init__(self, filename=None, *args, **kwargs):
1557         super().__init__(None, *args, **kwargs)
1558         if is_path_like(filename):
1559             filename = os.fspath(filename)
1560         self.filename = filename
1561
1562     @staticmethod
1563     def _true_or_false(cndn):
1564         return 'TRUE' if cndn else 'FALSE'
1565
1566     @contextlib.contextmanager
1567     def open(self, file, *, write=False):
1568         if is_path_like(file):
1569             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1570                 yield f
1571         else:
1572             if write:
1573                 file.truncate(0)
1574             yield file
1575
1576     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1577         now = time.time()
1578         for cookie in self:
1579             if (not ignore_discard and cookie.discard
1580                     or not ignore_expires and cookie.is_expired(now)):
1581                 continue
1582             name, value = cookie.name, cookie.value
1583             if value is None:
1584                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1585                 # with no name, whereas http.cookiejar regards it as a
1586                 # cookie with no value.
1587                 name, value = '', name
1588             f.write('%s\n' % '\t'.join((
1589                 cookie.domain,
1590                 self._true_or_false(cookie.domain.startswith('.')),
1591                 cookie.path,
1592                 self._true_or_false(cookie.secure),
1593                 str_or_none(cookie.expires, default=''),
1594                 name, value
1595             )))
1596
1597     def save(self, filename=None, *args, **kwargs):
1598         """
1599         Save cookies to a file.
1600         Code is taken from CPython 3.6
1601         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1602
1603         if filename is None:
1604             if self.filename is not None:
1605                 filename = self.filename
1606             else:
1607                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1608
1609         # Store session cookies with `expires` set to 0 instead of an empty string
1610         for cookie in self:
1611             if cookie.expires is None:
1612                 cookie.expires = 0
1613
1614         with self.open(filename, write=True) as f:
1615             f.write(self._HEADER)
1616             self._really_save(f, *args, **kwargs)
1617
1618     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1619         """Load cookies from a file."""
1620         if filename is None:
1621             if self.filename is not None:
1622                 filename = self.filename
1623             else:
1624                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1625
1626         def prepare_line(line):
1627             if line.startswith(self._HTTPONLY_PREFIX):
1628                 line = line[len(self._HTTPONLY_PREFIX):]
1629             # comments and empty lines are fine
1630             if line.startswith('#') or not line.strip():
1631                 return line
1632             cookie_list = line.split('\t')
1633             if len(cookie_list) != self._ENTRY_LEN:
1634                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1635             cookie = self._CookieFileEntry(*cookie_list)
1636             if cookie.expires_at and not cookie.expires_at.isdigit():
1637                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1638             return line
1639
1640         cf = io.StringIO()
1641         with self.open(filename) as f:
1642             for line in f:
1643                 try:
1644                     cf.write(prepare_line(line))
1645                 except http.cookiejar.LoadError as e:
1646                     if f'{line.strip()} '[0] in '[{"':
1647                         raise http.cookiejar.LoadError(
1648                             'Cookies file must be Netscape formatted, not JSON. See  '
1649                             'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1650                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1651                     continue
1652         cf.seek(0)
1653         self._really_load(cf, filename, ignore_discard, ignore_expires)
1654         # Session cookies are denoted by either `expires` field set to
1655         # an empty string or 0. MozillaCookieJar only recognizes the former
1656         # (see [1]). So we need force the latter to be recognized as session
1657         # cookies on our own.
1658         # Session cookies may be important for cookies-based authentication,
1659         # e.g. usually, when user does not check 'Remember me' check box while
1660         # logging in on a site, some important cookies are stored as session
1661         # cookies so that not recognizing them will result in failed login.
1662         # 1. https://bugs.python.org/issue17164
1663         for cookie in self:
1664             # Treat `expires=0` cookies as session cookies
1665             if cookie.expires == 0:
1666                 cookie.expires = None
1667                 cookie.discard = True
1668
1669
1670 class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
1671     def __init__(self, cookiejar=None):
1672         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
1673
1674     def http_response(self, request, response):
1675         return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
1676
1677     https_request = urllib.request.HTTPCookieProcessor.http_request
1678     https_response = http_response
1679
1680
1681 class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
1682     """YoutubeDL redirect handler
1683
1684     The code is based on HTTPRedirectHandler implementation from CPython [1].
1685
1686     This redirect handler solves two issues:
1687      - ensures redirect URL is always unicode under python 2
1688      - introduces support for experimental HTTP response status code
1689        308 Permanent Redirect [2] used by some sites [3]
1690
1691     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1692     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1693     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1694     """
1695
1696     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
1697
1698     def redirect_request(self, req, fp, code, msg, headers, newurl):
1699         """Return a Request or None in response to a redirect.
1700
1701         This is called by the http_error_30x methods when a
1702         redirection response is received.  If a redirection should
1703         take place, return a new Request to allow http_error_30x to
1704         perform the redirect.  Otherwise, raise HTTPError if no-one
1705         else should try to handle this url.  Return None if you can't
1706         but another Handler might.
1707         """
1708         m = req.get_method()
1709         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1710                  or code in (301, 302, 303) and m == "POST")):
1711             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
1712         # Strictly (according to RFC 2616), 301 or 302 in response to
1713         # a POST MUST NOT cause a redirection without confirmation
1714         # from the user (of urllib.request, in this case).  In practice,
1715         # essentially all clients do redirect in this case, so we do
1716         # the same.
1717
1718         # Be conciliant with URIs containing a space.  This is mainly
1719         # redundant with the more complete encoding done in http_error_302(),
1720         # but it is kept for compatibility with other callers.
1721         newurl = newurl.replace(' ', '%20')
1722
1723         CONTENT_HEADERS = ("content-length", "content-type")
1724         # NB: don't use dict comprehension for python 2.6 compatibility
1725         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1726
1727         # A 303 must either use GET or HEAD for subsequent request
1728         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1729         if code == 303 and m != 'HEAD':
1730             m = 'GET'
1731         # 301 and 302 redirects are commonly turned into a GET from a POST
1732         # for subsequent requests by browsers, so we'll do the same.
1733         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1734         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1735         if code in (301, 302) and m == 'POST':
1736             m = 'GET'
1737
1738         return urllib.request.Request(
1739             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1740             unverifiable=True, method=m)
1741
1742
1743 def extract_timezone(date_str):
1744     m = re.search(
1745         r'''(?x)
1746             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1747             (?P<tz>Z|                                            # just the UTC Z, or
1748                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1749                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1750                    [ ]?                                          # optional space
1751                 (?P<sign>\+|-)                                   # +/-
1752                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1753             $)
1754         ''', date_str)
1755     if not m:
1756         m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1757         timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
1758         if timezone is not None:
1759             date_str = date_str[:-len(m.group('tz'))]
1760         timezone = datetime.timedelta(hours=timezone or 0)
1761     else:
1762         date_str = date_str[:-len(m.group('tz'))]
1763         if not m.group('sign'):
1764             timezone = datetime.timedelta()
1765         else:
1766             sign = 1 if m.group('sign') == '+' else -1
1767             timezone = datetime.timedelta(
1768                 hours=sign * int(m.group('hours')),
1769                 minutes=sign * int(m.group('minutes')))
1770     return timezone, date_str
1771
1772
1773 def parse_iso8601(date_str, delimiter='T', timezone=None):
1774     """ Return a UNIX timestamp from the given date """
1775
1776     if date_str is None:
1777         return None
1778
1779     date_str = re.sub(r'\.[0-9]+', '', date_str)
1780
1781     if timezone is None:
1782         timezone, date_str = extract_timezone(date_str)
1783
1784     with contextlib.suppress(ValueError):
1785         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1786         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1787         return calendar.timegm(dt.timetuple())
1788
1789
1790 def date_formats(day_first=True):
1791     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1792
1793
1794 def unified_strdate(date_str, day_first=True):
1795     """Return a string with the date in the format YYYYMMDD"""
1796
1797     if date_str is None:
1798         return None
1799     upload_date = None
1800     # Replace commas
1801     date_str = date_str.replace(',', ' ')
1802     # Remove AM/PM + timezone
1803     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1804     _, date_str = extract_timezone(date_str)
1805
1806     for expression in date_formats(day_first):
1807         with contextlib.suppress(ValueError):
1808             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1809     if upload_date is None:
1810         timetuple = email.utils.parsedate_tz(date_str)
1811         if timetuple:
1812             with contextlib.suppress(ValueError):
1813                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1814     if upload_date is not None:
1815         return str(upload_date)
1816
1817
1818 def unified_timestamp(date_str, day_first=True):
1819     if date_str is None:
1820         return None
1821
1822     date_str = re.sub(r'\s+', ' ', re.sub(
1823         r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
1824
1825     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1826     timezone, date_str = extract_timezone(date_str)
1827
1828     # Remove AM/PM + timezone
1829     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1830
1831     # Remove unrecognized timezones from ISO 8601 alike timestamps
1832     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1833     if m:
1834         date_str = date_str[:-len(m.group('tz'))]
1835
1836     # Python only supports microseconds, so remove nanoseconds
1837     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1838     if m:
1839         date_str = m.group(1)
1840
1841     for expression in date_formats(day_first):
1842         with contextlib.suppress(ValueError):
1843             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1844             return calendar.timegm(dt.timetuple())
1845
1846     timetuple = email.utils.parsedate_tz(date_str)
1847     if timetuple:
1848         return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
1849
1850
1851 def determine_ext(url, default_ext='unknown_video'):
1852     if url is None or '.' not in url:
1853         return default_ext
1854     guess = url.partition('?')[0].rpartition('.')[2]
1855     if re.match(r'^[A-Za-z0-9]+$', guess):
1856         return guess
1857     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1858     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1859         return guess.rstrip('/')
1860     else:
1861         return default_ext
1862
1863
1864 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1865     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1866
1867
1868 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1869     R"""
1870     Return a datetime object from a string.
1871     Supported format:
1872         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1873
1874     @param format       strftime format of DATE
1875     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1876                         auto: round to the unit provided in date_str (if applicable).
1877     """
1878     auto_precision = False
1879     if precision == 'auto':
1880         auto_precision = True
1881         precision = 'microsecond'
1882     today = datetime_round(datetime.datetime.utcnow(), precision)
1883     if date_str in ('now', 'today'):
1884         return today
1885     if date_str == 'yesterday':
1886         return today - datetime.timedelta(days=1)
1887     match = re.match(
1888         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1889         date_str)
1890     if match is not None:
1891         start_time = datetime_from_str(match.group('start'), precision, format)
1892         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1893         unit = match.group('unit')
1894         if unit == 'month' or unit == 'year':
1895             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1896             unit = 'day'
1897         else:
1898             if unit == 'week':
1899                 unit = 'day'
1900                 time *= 7
1901             delta = datetime.timedelta(**{unit + 's': time})
1902             new_date = start_time + delta
1903         if auto_precision:
1904             return datetime_round(new_date, unit)
1905         return new_date
1906
1907     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1908
1909
1910 def date_from_str(date_str, format='%Y%m%d', strict=False):
1911     R"""
1912     Return a date object from a string using datetime_from_str
1913
1914     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1915                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1916     """
1917     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1918         raise ValueError(f'Invalid date format "{date_str}"')
1919     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1920
1921
1922 def datetime_add_months(dt, months):
1923     """Increment/Decrement a datetime object by months."""
1924     month = dt.month + months - 1
1925     year = dt.year + month // 12
1926     month = month % 12 + 1
1927     day = min(dt.day, calendar.monthrange(year, month)[1])
1928     return dt.replace(year, month, day)
1929
1930
1931 def datetime_round(dt, precision='day'):
1932     """
1933     Round a datetime object's time to a specific precision
1934     """
1935     if precision == 'microsecond':
1936         return dt
1937
1938     unit_seconds = {
1939         'day': 86400,
1940         'hour': 3600,
1941         'minute': 60,
1942         'second': 1,
1943     }
1944     roundto = lambda x, n: ((x + n / 2) // n) * n
1945     timestamp = calendar.timegm(dt.timetuple())
1946     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1947
1948
1949 def hyphenate_date(date_str):
1950     """
1951     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1952     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1953     if match is not None:
1954         return '-'.join(match.groups())
1955     else:
1956         return date_str
1957
1958
1959 class DateRange:
1960     """Represents a time interval between two dates"""
1961
1962     def __init__(self, start=None, end=None):
1963         """start and end must be strings in the format accepted by date"""
1964         if start is not None:
1965             self.start = date_from_str(start, strict=True)
1966         else:
1967             self.start = datetime.datetime.min.date()
1968         if end is not None:
1969             self.end = date_from_str(end, strict=True)
1970         else:
1971             self.end = datetime.datetime.max.date()
1972         if self.start > self.end:
1973             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1974
1975     @classmethod
1976     def day(cls, day):
1977         """Returns a range that only contains the given day"""
1978         return cls(day, day)
1979
1980     def __contains__(self, date):
1981         """Check if the date is in the range"""
1982         if not isinstance(date, datetime.date):
1983             date = date_from_str(date)
1984         return self.start <= date <= self.end
1985
1986     def __str__(self):
1987         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1988
1989     def __eq__(self, other):
1990         return (isinstance(other, DateRange)
1991                 and self.start == other.start and self.end == other.end)
1992
1993
1994 def platform_name():
1995     """ Returns the platform name as a str """
1996     deprecation_warning(f'"{__name__}.platform_name" is deprecated, use "platform.platform" instead')
1997     return platform.platform()
1998
1999
2000 @functools.cache
2001 def system_identifier():
2002     python_implementation = platform.python_implementation()
2003     if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2004         python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
2005     libc_ver = []
2006     with contextlib.suppress(OSError):  # We may not have access to the executable
2007         libc_ver = platform.libc_ver()
2008
2009     return 'Python %s (%s %s %s) - %s (%s%s)' % (
2010         platform.python_version(),
2011         python_implementation,
2012         platform.machine(),
2013         platform.architecture()[0],
2014         platform.platform(),
2015         ssl.OPENSSL_VERSION,
2016         format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
2017     )
2018
2019
2020 @functools.cache
2021 def get_windows_version():
2022     ''' Get Windows version. returns () if it's not running on Windows '''
2023     if compat_os_name == 'nt':
2024         return version_tuple(platform.win32_ver()[1])
2025     else:
2026         return ()
2027
2028
2029 def write_string(s, out=None, encoding=None):
2030     assert isinstance(s, str)
2031     out = out or sys.stderr
2032
2033     if compat_os_name == 'nt' and supports_terminal_sequences(out):
2034         s = re.sub(r'([\r\n]+)', r' \1', s)
2035
2036     enc, buffer = None, out
2037     if 'b' in getattr(out, 'mode', ''):
2038         enc = encoding or preferredencoding()
2039     elif hasattr(out, 'buffer'):
2040         buffer = out.buffer
2041         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
2042
2043     buffer.write(s.encode(enc, 'ignore') if enc else s)
2044     out.flush()
2045
2046
2047 def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
2048     from . import _IN_CLI
2049     if _IN_CLI:
2050         if msg in deprecation_warning._cache:
2051             return
2052         deprecation_warning._cache.add(msg)
2053         if printer:
2054             return printer(f'{msg}{bug_reports_message()}', **kwargs)
2055         return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
2056     else:
2057         import warnings
2058         warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
2059
2060
2061 deprecation_warning._cache = set()
2062
2063
2064 def bytes_to_intlist(bs):
2065     if not bs:
2066         return []
2067     if isinstance(bs[0], int):  # Python 3
2068         return list(bs)
2069     else:
2070         return [ord(c) for c in bs]
2071
2072
2073 def intlist_to_bytes(xs):
2074     if not xs:
2075         return b''
2076     return struct.pack('%dB' % len(xs), *xs)
2077
2078
2079 class LockingUnsupportedError(OSError):
2080     msg = 'File locking is not supported'
2081
2082     def __init__(self):
2083         super().__init__(self.msg)
2084
2085
2086 # Cross-platform file locking
2087 if sys.platform == 'win32':
2088     import ctypes
2089     import ctypes.wintypes
2090     import msvcrt
2091
2092     class OVERLAPPED(ctypes.Structure):
2093         _fields_ = [
2094             ('Internal', ctypes.wintypes.LPVOID),
2095             ('InternalHigh', ctypes.wintypes.LPVOID),
2096             ('Offset', ctypes.wintypes.DWORD),
2097             ('OffsetHigh', ctypes.wintypes.DWORD),
2098             ('hEvent', ctypes.wintypes.HANDLE),
2099         ]
2100
2101     kernel32 = ctypes.windll.kernel32
2102     LockFileEx = kernel32.LockFileEx
2103     LockFileEx.argtypes = [
2104         ctypes.wintypes.HANDLE,     # hFile
2105         ctypes.wintypes.DWORD,      # dwFlags
2106         ctypes.wintypes.DWORD,      # dwReserved
2107         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2108         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2109         ctypes.POINTER(OVERLAPPED)  # Overlapped
2110     ]
2111     LockFileEx.restype = ctypes.wintypes.BOOL
2112     UnlockFileEx = kernel32.UnlockFileEx
2113     UnlockFileEx.argtypes = [
2114         ctypes.wintypes.HANDLE,     # hFile
2115         ctypes.wintypes.DWORD,      # dwReserved
2116         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2117         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2118         ctypes.POINTER(OVERLAPPED)  # Overlapped
2119     ]
2120     UnlockFileEx.restype = ctypes.wintypes.BOOL
2121     whole_low = 0xffffffff
2122     whole_high = 0x7fffffff
2123
2124     def _lock_file(f, exclusive, block):
2125         overlapped = OVERLAPPED()
2126         overlapped.Offset = 0
2127         overlapped.OffsetHigh = 0
2128         overlapped.hEvent = 0
2129         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2130
2131         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2132                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2133                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2134             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2135             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2136
2137     def _unlock_file(f):
2138         assert f._lock_file_overlapped_p
2139         handle = msvcrt.get_osfhandle(f.fileno())
2140         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2141             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2142
2143 else:
2144     try:
2145         import fcntl
2146
2147         def _lock_file(f, exclusive, block):
2148             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2149             if not block:
2150                 flags |= fcntl.LOCK_NB
2151             try:
2152                 fcntl.flock(f, flags)
2153             except BlockingIOError:
2154                 raise
2155             except OSError:  # AOSP does not have flock()
2156                 fcntl.lockf(f, flags)
2157
2158         def _unlock_file(f):
2159             try:
2160                 fcntl.flock(f, fcntl.LOCK_UN)
2161             except OSError:
2162                 fcntl.lockf(f, fcntl.LOCK_UN)
2163
2164     except ImportError:
2165
2166         def _lock_file(f, exclusive, block):
2167             raise LockingUnsupportedError()
2168
2169         def _unlock_file(f):
2170             raise LockingUnsupportedError()
2171
2172
2173 class locked_file:
2174     locked = False
2175
2176     def __init__(self, filename, mode, block=True, encoding=None):
2177         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2178             raise NotImplementedError(mode)
2179         self.mode, self.block = mode, block
2180
2181         writable = any(f in mode for f in 'wax+')
2182         readable = any(f in mode for f in 'r+')
2183         flags = functools.reduce(operator.ior, (
2184             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2185             getattr(os, 'O_BINARY', 0),  # Windows only
2186             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2187             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2188             os.O_APPEND if 'a' in mode else 0,
2189             os.O_EXCL if 'x' in mode else 0,
2190             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2191         ))
2192
2193         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2194
2195     def __enter__(self):
2196         exclusive = 'r' not in self.mode
2197         try:
2198             _lock_file(self.f, exclusive, self.block)
2199             self.locked = True
2200         except OSError:
2201             self.f.close()
2202             raise
2203         if 'w' in self.mode:
2204             try:
2205                 self.f.truncate()
2206             except OSError as e:
2207                 if e.errno not in (
2208                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2209                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2210                 ):
2211                     raise
2212         return self
2213
2214     def unlock(self):
2215         if not self.locked:
2216             return
2217         try:
2218             _unlock_file(self.f)
2219         finally:
2220             self.locked = False
2221
2222     def __exit__(self, *_):
2223         try:
2224             self.unlock()
2225         finally:
2226             self.f.close()
2227
2228     open = __enter__
2229     close = __exit__
2230
2231     def __getattr__(self, attr):
2232         return getattr(self.f, attr)
2233
2234     def __iter__(self):
2235         return iter(self.f)
2236
2237
2238 @functools.cache
2239 def get_filesystem_encoding():
2240     encoding = sys.getfilesystemencoding()
2241     return encoding if encoding is not None else 'utf-8'
2242
2243
2244 def shell_quote(args):
2245     quoted_args = []
2246     encoding = get_filesystem_encoding()
2247     for a in args:
2248         if isinstance(a, bytes):
2249             # We may get a filename encoded with 'encodeFilename'
2250             a = a.decode(encoding)
2251         quoted_args.append(compat_shlex_quote(a))
2252     return ' '.join(quoted_args)
2253
2254
2255 def smuggle_url(url, data):
2256     """ Pass additional data in a URL for internal use. """
2257
2258     url, idata = unsmuggle_url(url, {})
2259     data.update(idata)
2260     sdata = urllib.parse.urlencode(
2261         {'__youtubedl_smuggle': json.dumps(data)})
2262     return url + '#' + sdata
2263
2264
2265 def unsmuggle_url(smug_url, default=None):
2266     if '#__youtubedl_smuggle' not in smug_url:
2267         return smug_url, default
2268     url, _, sdata = smug_url.rpartition('#')
2269     jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
2270     data = json.loads(jsond)
2271     return url, data
2272
2273
2274 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2275     """ Formats numbers with decimal sufixes like K, M, etc """
2276     num, factor = float_or_none(num), float(factor)
2277     if num is None or num < 0:
2278         return None
2279     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2280     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2281     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2282     if factor == 1024:
2283         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2284     converted = num / (factor ** exponent)
2285     return fmt % (converted, suffix)
2286
2287
2288 def format_bytes(bytes):
2289     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2290
2291
2292 def lookup_unit_table(unit_table, s):
2293     units_re = '|'.join(re.escape(u) for u in unit_table)
2294     m = re.match(
2295         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2296     if not m:
2297         return None
2298     num_str = m.group('num').replace(',', '.')
2299     mult = unit_table[m.group('unit')]
2300     return int(float(num_str) * mult)
2301
2302
2303 def parse_filesize(s):
2304     if s is None:
2305         return None
2306
2307     # The lower-case forms are of course incorrect and unofficial,
2308     # but we support those too
2309     _UNIT_TABLE = {
2310         'B': 1,
2311         'b': 1,
2312         'bytes': 1,
2313         'KiB': 1024,
2314         'KB': 1000,
2315         'kB': 1024,
2316         'Kb': 1000,
2317         'kb': 1000,
2318         'kilobytes': 1000,
2319         'kibibytes': 1024,
2320         'MiB': 1024 ** 2,
2321         'MB': 1000 ** 2,
2322         'mB': 1024 ** 2,
2323         'Mb': 1000 ** 2,
2324         'mb': 1000 ** 2,
2325         'megabytes': 1000 ** 2,
2326         'mebibytes': 1024 ** 2,
2327         'GiB': 1024 ** 3,
2328         'GB': 1000 ** 3,
2329         'gB': 1024 ** 3,
2330         'Gb': 1000 ** 3,
2331         'gb': 1000 ** 3,
2332         'gigabytes': 1000 ** 3,
2333         'gibibytes': 1024 ** 3,
2334         'TiB': 1024 ** 4,
2335         'TB': 1000 ** 4,
2336         'tB': 1024 ** 4,
2337         'Tb': 1000 ** 4,
2338         'tb': 1000 ** 4,
2339         'terabytes': 1000 ** 4,
2340         'tebibytes': 1024 ** 4,
2341         'PiB': 1024 ** 5,
2342         'PB': 1000 ** 5,
2343         'pB': 1024 ** 5,
2344         'Pb': 1000 ** 5,
2345         'pb': 1000 ** 5,
2346         'petabytes': 1000 ** 5,
2347         'pebibytes': 1024 ** 5,
2348         'EiB': 1024 ** 6,
2349         'EB': 1000 ** 6,
2350         'eB': 1024 ** 6,
2351         'Eb': 1000 ** 6,
2352         'eb': 1000 ** 6,
2353         'exabytes': 1000 ** 6,
2354         'exbibytes': 1024 ** 6,
2355         'ZiB': 1024 ** 7,
2356         'ZB': 1000 ** 7,
2357         'zB': 1024 ** 7,
2358         'Zb': 1000 ** 7,
2359         'zb': 1000 ** 7,
2360         'zettabytes': 1000 ** 7,
2361         'zebibytes': 1024 ** 7,
2362         'YiB': 1024 ** 8,
2363         'YB': 1000 ** 8,
2364         'yB': 1024 ** 8,
2365         'Yb': 1000 ** 8,
2366         'yb': 1000 ** 8,
2367         'yottabytes': 1000 ** 8,
2368         'yobibytes': 1024 ** 8,
2369     }
2370
2371     return lookup_unit_table(_UNIT_TABLE, s)
2372
2373
2374 def parse_count(s):
2375     if s is None:
2376         return None
2377
2378     s = re.sub(r'^[^\d]+\s', '', s).strip()
2379
2380     if re.match(r'^[\d,.]+$', s):
2381         return str_to_int(s)
2382
2383     _UNIT_TABLE = {
2384         'k': 1000,
2385         'K': 1000,
2386         'm': 1000 ** 2,
2387         'M': 1000 ** 2,
2388         'kk': 1000 ** 2,
2389         'KK': 1000 ** 2,
2390         'b': 1000 ** 3,
2391         'B': 1000 ** 3,
2392     }
2393
2394     ret = lookup_unit_table(_UNIT_TABLE, s)
2395     if ret is not None:
2396         return ret
2397
2398     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2399     if mobj:
2400         return str_to_int(mobj.group(1))
2401
2402
2403 def parse_resolution(s, *, lenient=False):
2404     if s is None:
2405         return {}
2406
2407     if lenient:
2408         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2409     else:
2410         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2411     if mobj:
2412         return {
2413             'width': int(mobj.group('w')),
2414             'height': int(mobj.group('h')),
2415         }
2416
2417     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2418     if mobj:
2419         return {'height': int(mobj.group(1))}
2420
2421     mobj = re.search(r'\b([48])[kK]\b', s)
2422     if mobj:
2423         return {'height': int(mobj.group(1)) * 540}
2424
2425     return {}
2426
2427
2428 def parse_bitrate(s):
2429     if not isinstance(s, str):
2430         return
2431     mobj = re.search(r'\b(\d+)\s*kbps', s)
2432     if mobj:
2433         return int(mobj.group(1))
2434
2435
2436 def month_by_name(name, lang='en'):
2437     """ Return the number of a month by (locale-independently) English name """
2438
2439     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2440
2441     try:
2442         return month_names.index(name) + 1
2443     except ValueError:
2444         return None
2445
2446
2447 def month_by_abbreviation(abbrev):
2448     """ Return the number of a month by (locale-independently) English
2449         abbreviations """
2450
2451     try:
2452         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2453     except ValueError:
2454         return None
2455
2456
2457 def fix_xml_ampersands(xml_str):
2458     """Replace all the '&' by '&amp;' in XML"""
2459     return re.sub(
2460         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2461         '&amp;',
2462         xml_str)
2463
2464
2465 def setproctitle(title):
2466     assert isinstance(title, str)
2467
2468     # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
2469     try:
2470         import ctypes
2471     except ImportError:
2472         return
2473
2474     try:
2475         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2476     except OSError:
2477         return
2478     except TypeError:
2479         # LoadLibrary in Windows Python 2.7.13 only expects
2480         # a bytestring, but since unicode_literals turns
2481         # every string into a unicode string, it fails.
2482         return
2483     title_bytes = title.encode()
2484     buf = ctypes.create_string_buffer(len(title_bytes))
2485     buf.value = title_bytes
2486     try:
2487         libc.prctl(15, buf, 0, 0, 0)
2488     except AttributeError:
2489         return  # Strange libc, just skip this
2490
2491
2492 def remove_start(s, start):
2493     return s[len(start):] if s is not None and s.startswith(start) else s
2494
2495
2496 def remove_end(s, end):
2497     return s[:-len(end)] if s is not None and s.endswith(end) else s
2498
2499
2500 def remove_quotes(s):
2501     if s is None or len(s) < 2:
2502         return s
2503     for quote in ('"', "'", ):
2504         if s[0] == quote and s[-1] == quote:
2505             return s[1:-1]
2506     return s
2507
2508
2509 def get_domain(url):
2510     """
2511     This implementation is inconsistent, but is kept for compatibility.
2512     Use this only for "webpage_url_domain"
2513     """
2514     return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
2515
2516
2517 def url_basename(url):
2518     path = urllib.parse.urlparse(url).path
2519     return path.strip('/').split('/')[-1]
2520
2521
2522 def base_url(url):
2523     return re.match(r'https?://[^?#]+/', url).group()
2524
2525
2526 def urljoin(base, path):
2527     if isinstance(path, bytes):
2528         path = path.decode()
2529     if not isinstance(path, str) or not path:
2530         return None
2531     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2532         return path
2533     if isinstance(base, bytes):
2534         base = base.decode()
2535     if not isinstance(base, str) or not re.match(
2536             r'^(?:https?:)?//', base):
2537         return None
2538     return urllib.parse.urljoin(base, path)
2539
2540
2541 class HEADRequest(urllib.request.Request):
2542     def get_method(self):
2543         return 'HEAD'
2544
2545
2546 class PUTRequest(urllib.request.Request):
2547     def get_method(self):
2548         return 'PUT'
2549
2550
2551 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2552     if get_attr and v is not None:
2553         v = getattr(v, get_attr, None)
2554     try:
2555         return int(v) * invscale // scale
2556     except (ValueError, TypeError, OverflowError):
2557         return default
2558
2559
2560 def str_or_none(v, default=None):
2561     return default if v is None else str(v)
2562
2563
2564 def str_to_int(int_str):
2565     """ A more relaxed version of int_or_none """
2566     if isinstance(int_str, int):
2567         return int_str
2568     elif isinstance(int_str, str):
2569         int_str = re.sub(r'[,\.\+]', '', int_str)
2570         return int_or_none(int_str)
2571
2572
2573 def float_or_none(v, scale=1, invscale=1, default=None):
2574     if v is None:
2575         return default
2576     try:
2577         return float(v) * invscale / scale
2578     except (ValueError, TypeError):
2579         return default
2580
2581
2582 def bool_or_none(v, default=None):
2583     return v if isinstance(v, bool) else default
2584
2585
2586 def strip_or_none(v, default=None):
2587     return v.strip() if isinstance(v, str) else default
2588
2589
2590 def url_or_none(url):
2591     if not url or not isinstance(url, str):
2592         return None
2593     url = url.strip()
2594     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2595
2596
2597 def request_to_url(req):
2598     if isinstance(req, urllib.request.Request):
2599         return req.get_full_url()
2600     else:
2601         return req
2602
2603
2604 def strftime_or_none(timestamp, date_format, default=None):
2605     datetime_object = None
2606     try:
2607         if isinstance(timestamp, (int, float)):  # unix timestamp
2608             # Using naive datetime here can break timestamp() in Windows
2609             # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
2610             datetime_object = datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
2611         elif isinstance(timestamp, str):  # assume YYYYMMDD
2612             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2613         date_format = re.sub(  # Support %s on windows
2614             r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
2615         return datetime_object.strftime(date_format)
2616     except (ValueError, TypeError, AttributeError):
2617         return default
2618
2619
2620 def parse_duration(s):
2621     if not isinstance(s, str):
2622         return None
2623     s = s.strip()
2624     if not s:
2625         return None
2626
2627     days, hours, mins, secs, ms = [None] * 5
2628     m = re.match(r'''(?x)
2629             (?P<before_secs>
2630                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2631             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2632             (?P<ms>[.:][0-9]+)?Z?$
2633         ''', s)
2634     if m:
2635         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2636     else:
2637         m = re.match(
2638             r'''(?ix)(?:P?
2639                 (?:
2640                     [0-9]+\s*y(?:ears?)?,?\s*
2641                 )?
2642                 (?:
2643                     [0-9]+\s*m(?:onths?)?,?\s*
2644                 )?
2645                 (?:
2646                     [0-9]+\s*w(?:eeks?)?,?\s*
2647                 )?
2648                 (?:
2649                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2650                 )?
2651                 T)?
2652                 (?:
2653                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2654                 )?
2655                 (?:
2656                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2657                 )?
2658                 (?:
2659                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2660                 )?Z?$''', s)
2661         if m:
2662             days, hours, mins, secs, ms = m.groups()
2663         else:
2664             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2665             if m:
2666                 hours, mins = m.groups()
2667             else:
2668                 return None
2669
2670     if ms:
2671         ms = ms.replace(':', '.')
2672     return sum(float(part or 0) * mult for part, mult in (
2673         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2674
2675
2676 def prepend_extension(filename, ext, expected_real_ext=None):
2677     name, real_ext = os.path.splitext(filename)
2678     return (
2679         f'{name}.{ext}{real_ext}'
2680         if not expected_real_ext or real_ext[1:] == expected_real_ext
2681         else f'{filename}.{ext}')
2682
2683
2684 def replace_extension(filename, ext, expected_real_ext=None):
2685     name, real_ext = os.path.splitext(filename)
2686     return '{}.{}'.format(
2687         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2688         ext)
2689
2690
2691 def check_executable(exe, args=[]):
2692     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2693     args can be a list of arguments for a short output (like -version) """
2694     try:
2695         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2696     except OSError:
2697         return False
2698     return exe
2699
2700
2701 def _get_exe_version_output(exe, args):
2702     try:
2703         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2704         # SIGTTOU if yt-dlp is run in the background.
2705         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2706         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2707                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2708     except OSError:
2709         return False
2710     return stdout
2711
2712
2713 def detect_exe_version(output, version_re=None, unrecognized='present'):
2714     assert isinstance(output, str)
2715     if version_re is None:
2716         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2717     m = re.search(version_re, output)
2718     if m:
2719         return m.group(1)
2720     else:
2721         return unrecognized
2722
2723
2724 def get_exe_version(exe, args=['--version'],
2725                     version_re=None, unrecognized='present'):
2726     """ Returns the version of the specified executable,
2727     or False if the executable is not present """
2728     out = _get_exe_version_output(exe, args)
2729     return detect_exe_version(out, version_re, unrecognized) if out else False
2730
2731
2732 def frange(start=0, stop=None, step=1):
2733     """Float range"""
2734     if stop is None:
2735         start, stop = 0, start
2736     sign = [-1, 1][step > 0] if step else 0
2737     while sign * start < sign * stop:
2738         yield start
2739         start += step
2740
2741
2742 class LazyList(collections.abc.Sequence):
2743     """Lazy immutable list from an iterable
2744     Note that slices of a LazyList are lists and not LazyList"""
2745
2746     class IndexError(IndexError):
2747         pass
2748
2749     def __init__(self, iterable, *, reverse=False, _cache=None):
2750         self._iterable = iter(iterable)
2751         self._cache = [] if _cache is None else _cache
2752         self._reversed = reverse
2753
2754     def __iter__(self):
2755         if self._reversed:
2756             # We need to consume the entire iterable to iterate in reverse
2757             yield from self.exhaust()
2758             return
2759         yield from self._cache
2760         for item in self._iterable:
2761             self._cache.append(item)
2762             yield item
2763
2764     def _exhaust(self):
2765         self._cache.extend(self._iterable)
2766         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2767         return self._cache
2768
2769     def exhaust(self):
2770         """Evaluate the entire iterable"""
2771         return self._exhaust()[::-1 if self._reversed else 1]
2772
2773     @staticmethod
2774     def _reverse_index(x):
2775         return None if x is None else ~x
2776
2777     def __getitem__(self, idx):
2778         if isinstance(idx, slice):
2779             if self._reversed:
2780                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2781             start, stop, step = idx.start, idx.stop, idx.step or 1
2782         elif isinstance(idx, int):
2783             if self._reversed:
2784                 idx = self._reverse_index(idx)
2785             start, stop, step = idx, idx, 0
2786         else:
2787             raise TypeError('indices must be integers or slices')
2788         if ((start or 0) < 0 or (stop or 0) < 0
2789                 or (start is None and step < 0)
2790                 or (stop is None and step > 0)):
2791             # We need to consume the entire iterable to be able to slice from the end
2792             # Obviously, never use this with infinite iterables
2793             self._exhaust()
2794             try:
2795                 return self._cache[idx]
2796             except IndexError as e:
2797                 raise self.IndexError(e) from e
2798         n = max(start or 0, stop or 0) - len(self._cache) + 1
2799         if n > 0:
2800             self._cache.extend(itertools.islice(self._iterable, n))
2801         try:
2802             return self._cache[idx]
2803         except IndexError as e:
2804             raise self.IndexError(e) from e
2805
2806     def __bool__(self):
2807         try:
2808             self[-1] if self._reversed else self[0]
2809         except self.IndexError:
2810             return False
2811         return True
2812
2813     def __len__(self):
2814         self._exhaust()
2815         return len(self._cache)
2816
2817     def __reversed__(self):
2818         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2819
2820     def __copy__(self):
2821         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2822
2823     def __repr__(self):
2824         # repr and str should mimic a list. So we exhaust the iterable
2825         return repr(self.exhaust())
2826
2827     def __str__(self):
2828         return repr(self.exhaust())
2829
2830
2831 class PagedList:
2832
2833     class IndexError(IndexError):
2834         pass
2835
2836     def __len__(self):
2837         # This is only useful for tests
2838         return len(self.getslice())
2839
2840     def __init__(self, pagefunc, pagesize, use_cache=True):
2841         self._pagefunc = pagefunc
2842         self._pagesize = pagesize
2843         self._pagecount = float('inf')
2844         self._use_cache = use_cache
2845         self._cache = {}
2846
2847     def getpage(self, pagenum):
2848         page_results = self._cache.get(pagenum)
2849         if page_results is None:
2850             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2851         if self._use_cache:
2852             self._cache[pagenum] = page_results
2853         return page_results
2854
2855     def getslice(self, start=0, end=None):
2856         return list(self._getslice(start, end))
2857
2858     def _getslice(self, start, end):
2859         raise NotImplementedError('This method must be implemented by subclasses')
2860
2861     def __getitem__(self, idx):
2862         assert self._use_cache, 'Indexing PagedList requires cache'
2863         if not isinstance(idx, int) or idx < 0:
2864             raise TypeError('indices must be non-negative integers')
2865         entries = self.getslice(idx, idx + 1)
2866         if not entries:
2867             raise self.IndexError()
2868         return entries[0]
2869
2870
2871 class OnDemandPagedList(PagedList):
2872     """Download pages until a page with less than maximum results"""
2873
2874     def _getslice(self, start, end):
2875         for pagenum in itertools.count(start // self._pagesize):
2876             firstid = pagenum * self._pagesize
2877             nextfirstid = pagenum * self._pagesize + self._pagesize
2878             if start >= nextfirstid:
2879                 continue
2880
2881             startv = (
2882                 start % self._pagesize
2883                 if firstid <= start < nextfirstid
2884                 else 0)
2885             endv = (
2886                 ((end - 1) % self._pagesize) + 1
2887                 if (end is not None and firstid <= end <= nextfirstid)
2888                 else None)
2889
2890             try:
2891                 page_results = self.getpage(pagenum)
2892             except Exception:
2893                 self._pagecount = pagenum - 1
2894                 raise
2895             if startv != 0 or endv is not None:
2896                 page_results = page_results[startv:endv]
2897             yield from page_results
2898
2899             # A little optimization - if current page is not "full", ie. does
2900             # not contain page_size videos then we can assume that this page
2901             # is the last one - there are no more ids on further pages -
2902             # i.e. no need to query again.
2903             if len(page_results) + startv < self._pagesize:
2904                 break
2905
2906             # If we got the whole page, but the next page is not interesting,
2907             # break out early as well
2908             if end == nextfirstid:
2909                 break
2910
2911
2912 class InAdvancePagedList(PagedList):
2913     """PagedList with total number of pages known in advance"""
2914
2915     def __init__(self, pagefunc, pagecount, pagesize):
2916         PagedList.__init__(self, pagefunc, pagesize, True)
2917         self._pagecount = pagecount
2918
2919     def _getslice(self, start, end):
2920         start_page = start // self._pagesize
2921         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2922         skip_elems = start - start_page * self._pagesize
2923         only_more = None if end is None else end - start
2924         for pagenum in range(start_page, end_page):
2925             page_results = self.getpage(pagenum)
2926             if skip_elems:
2927                 page_results = page_results[skip_elems:]
2928                 skip_elems = None
2929             if only_more is not None:
2930                 if len(page_results) < only_more:
2931                     only_more -= len(page_results)
2932                 else:
2933                     yield from page_results[:only_more]
2934                     break
2935             yield from page_results
2936
2937
2938 class PlaylistEntries:
2939     MissingEntry = object()
2940     is_exhausted = False
2941
2942     def __init__(self, ydl, info_dict):
2943         self.ydl = ydl
2944
2945         # _entries must be assigned now since infodict can change during iteration
2946         entries = info_dict.get('entries')
2947         if entries is None:
2948             raise EntryNotInPlaylist('There are no entries')
2949         elif isinstance(entries, list):
2950             self.is_exhausted = True
2951
2952         requested_entries = info_dict.get('requested_entries')
2953         self.is_incomplete = requested_entries is not None
2954         if self.is_incomplete:
2955             assert self.is_exhausted
2956             self._entries = [self.MissingEntry] * max(requested_entries or [0])
2957             for i, entry in zip(requested_entries, entries):
2958                 self._entries[i - 1] = entry
2959         elif isinstance(entries, (list, PagedList, LazyList)):
2960             self._entries = entries
2961         else:
2962             self._entries = LazyList(entries)
2963
2964     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2965         (?P<start>[+-]?\d+)?
2966         (?P<range>[:-]
2967             (?P<end>[+-]?\d+|inf(?:inite)?)?
2968             (?::(?P<step>[+-]?\d+))?
2969         )?''')
2970
2971     @classmethod
2972     def parse_playlist_items(cls, string):
2973         for segment in string.split(','):
2974             if not segment:
2975                 raise ValueError('There is two or more consecutive commas')
2976             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2977             if not mobj:
2978                 raise ValueError(f'{segment!r} is not a valid specification')
2979             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2980             if int_or_none(step) == 0:
2981                 raise ValueError(f'Step in {segment!r} cannot be zero')
2982             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2983
2984     def get_requested_items(self):
2985         playlist_items = self.ydl.params.get('playlist_items')
2986         playlist_start = self.ydl.params.get('playliststart', 1)
2987         playlist_end = self.ydl.params.get('playlistend')
2988         # For backwards compatibility, interpret -1 as whole list
2989         if playlist_end in (-1, None):
2990             playlist_end = ''
2991         if not playlist_items:
2992             playlist_items = f'{playlist_start}:{playlist_end}'
2993         elif playlist_start != 1 or playlist_end:
2994             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
2995
2996         for index in self.parse_playlist_items(playlist_items):
2997             for i, entry in self[index]:
2998                 yield i, entry
2999                 if not entry:
3000                     continue
3001                 try:
3002                     # TODO: Add auto-generated fields
3003                     self.ydl._match_entry(entry, incomplete=True, silent=True)
3004                 except (ExistingVideoReached, RejectedVideoReached):
3005                     return
3006
3007     def get_full_count(self):
3008         if self.is_exhausted and not self.is_incomplete:
3009             return len(self)
3010         elif isinstance(self._entries, InAdvancePagedList):
3011             if self._entries._pagesize == 1:
3012                 return self._entries._pagecount
3013
3014     @functools.cached_property
3015     def _getter(self):
3016         if isinstance(self._entries, list):
3017             def get_entry(i):
3018                 try:
3019                     entry = self._entries[i]
3020                 except IndexError:
3021                     entry = self.MissingEntry
3022                     if not self.is_incomplete:
3023                         raise self.IndexError()
3024                 if entry is self.MissingEntry:
3025                     raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found')
3026                 return entry
3027         else:
3028             def get_entry(i):
3029                 try:
3030                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
3031                 except (LazyList.IndexError, PagedList.IndexError):
3032                     raise self.IndexError()
3033         return get_entry
3034
3035     def __getitem__(self, idx):
3036         if isinstance(idx, int):
3037             idx = slice(idx, idx)
3038
3039         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
3040         step = 1 if idx.step is None else idx.step
3041         if idx.start is None:
3042             start = 0 if step > 0 else len(self) - 1
3043         else:
3044             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
3045
3046         # NB: Do not call len(self) when idx == [:]
3047         if idx.stop is None:
3048             stop = 0 if step < 0 else float('inf')
3049         else:
3050             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
3051         stop += [-1, 1][step > 0]
3052
3053         for i in frange(start, stop, step):
3054             if i < 0:
3055                 continue
3056             try:
3057                 entry = self._getter(i)
3058             except self.IndexError:
3059                 self.is_exhausted = True
3060                 if step > 0:
3061                     break
3062                 continue
3063             yield i + 1, entry
3064
3065     def __len__(self):
3066         return len(tuple(self[:]))
3067
3068     class IndexError(IndexError):
3069         pass
3070
3071
3072 def uppercase_escape(s):
3073     unicode_escape = codecs.getdecoder('unicode_escape')
3074     return re.sub(
3075         r'\\U[0-9a-fA-F]{8}',
3076         lambda m: unicode_escape(m.group(0))[0],
3077         s)
3078
3079
3080 def lowercase_escape(s):
3081     unicode_escape = codecs.getdecoder('unicode_escape')
3082     return re.sub(
3083         r'\\u[0-9a-fA-F]{4}',
3084         lambda m: unicode_escape(m.group(0))[0],
3085         s)
3086
3087
3088 def escape_rfc3986(s):
3089     """Escape non-ASCII characters as suggested by RFC 3986"""
3090     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3091
3092
3093 def escape_url(url):
3094     """Escape URL as suggested by RFC 3986"""
3095     url_parsed = urllib.parse.urlparse(url)
3096     return url_parsed._replace(
3097         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3098         path=escape_rfc3986(url_parsed.path),
3099         params=escape_rfc3986(url_parsed.params),
3100         query=escape_rfc3986(url_parsed.query),
3101         fragment=escape_rfc3986(url_parsed.fragment)
3102     ).geturl()
3103
3104
3105 def parse_qs(url, **kwargs):
3106     return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
3107
3108
3109 def read_batch_urls(batch_fd):
3110     def fixup(url):
3111         if not isinstance(url, str):
3112             url = url.decode('utf-8', 'replace')
3113         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3114         for bom in BOM_UTF8:
3115             if url.startswith(bom):
3116                 url = url[len(bom):]
3117         url = url.lstrip()
3118         if not url or url.startswith(('#', ';', ']')):
3119             return False
3120         # "#" cannot be stripped out since it is part of the URI
3121         # However, it can be safely stripped out if following a whitespace
3122         return re.split(r'\s#', url, 1)[0].rstrip()
3123
3124     with contextlib.closing(batch_fd) as fd:
3125         return [url for url in map(fixup, fd) if url]
3126
3127
3128 def urlencode_postdata(*args, **kargs):
3129     return urllib.parse.urlencode(*args, **kargs).encode('ascii')
3130
3131
3132 def update_url_query(url, query):
3133     if not query:
3134         return url
3135     parsed_url = urllib.parse.urlparse(url)
3136     qs = urllib.parse.parse_qs(parsed_url.query)
3137     qs.update(query)
3138     return urllib.parse.urlunparse(parsed_url._replace(
3139         query=urllib.parse.urlencode(qs, True)))
3140
3141
3142 def update_Request(req, url=None, data=None, headers=None, query=None):
3143     req_headers = req.headers.copy()
3144     req_headers.update(headers or {})
3145     req_data = data or req.data
3146     req_url = update_url_query(url or req.get_full_url(), query)
3147     req_get_method = req.get_method()
3148     if req_get_method == 'HEAD':
3149         req_type = HEADRequest
3150     elif req_get_method == 'PUT':
3151         req_type = PUTRequest
3152     else:
3153         req_type = urllib.request.Request
3154     new_req = req_type(
3155         req_url, data=req_data, headers=req_headers,
3156         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3157     if hasattr(req, 'timeout'):
3158         new_req.timeout = req.timeout
3159     return new_req
3160
3161
3162 def _multipart_encode_impl(data, boundary):
3163     content_type = 'multipart/form-data; boundary=%s' % boundary
3164
3165     out = b''
3166     for k, v in data.items():
3167         out += b'--' + boundary.encode('ascii') + b'\r\n'
3168         if isinstance(k, str):
3169             k = k.encode()
3170         if isinstance(v, str):
3171             v = v.encode()
3172         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3173         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3174         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3175         if boundary.encode('ascii') in content:
3176             raise ValueError('Boundary overlaps with data')
3177         out += content
3178
3179     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3180
3181     return out, content_type
3182
3183
3184 def multipart_encode(data, boundary=None):
3185     '''
3186     Encode a dict to RFC 7578-compliant form-data
3187
3188     data:
3189         A dict where keys and values can be either Unicode or bytes-like
3190         objects.
3191     boundary:
3192         If specified a Unicode object, it's used as the boundary. Otherwise
3193         a random boundary is generated.
3194
3195     Reference: https://tools.ietf.org/html/rfc7578
3196     '''
3197     has_specified_boundary = boundary is not None
3198
3199     while True:
3200         if boundary is None:
3201             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3202
3203         try:
3204             out, content_type = _multipart_encode_impl(data, boundary)
3205             break
3206         except ValueError:
3207             if has_specified_boundary:
3208                 raise
3209             boundary = None
3210
3211     return out, content_type
3212
3213
3214 def variadic(x, allowed_types=(str, bytes, dict)):
3215     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
3216
3217
3218 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3219     for val in map(d.get, variadic(key_or_keys)):
3220         if val is not None and (val or not skip_false_values):
3221             return val
3222     return default
3223
3224
3225 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3226     for f in funcs:
3227         try:
3228             val = f(*args, **kwargs)
3229         except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
3230             pass
3231         else:
3232             if expected_type is None or isinstance(val, expected_type):
3233                 return val
3234
3235
3236 def try_get(src, getter, expected_type=None):
3237     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3238
3239
3240 def filter_dict(dct, cndn=lambda _, v: v is not None):
3241     return {k: v for k, v in dct.items() if cndn(k, v)}
3242
3243
3244 def merge_dicts(*dicts):
3245     merged = {}
3246     for a_dict in dicts:
3247         for k, v in a_dict.items():
3248             if (v is not None and k not in merged
3249                     or isinstance(v, str) and merged[k] == ''):
3250                 merged[k] = v
3251     return merged
3252
3253
3254 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3255     return string if isinstance(string, str) else str(string, encoding, errors)
3256
3257
3258 US_RATINGS = {
3259     'G': 0,
3260     'PG': 10,
3261     'PG-13': 13,
3262     'R': 16,
3263     'NC': 18,
3264 }
3265
3266
3267 TV_PARENTAL_GUIDELINES = {
3268     'TV-Y': 0,
3269     'TV-Y7': 7,
3270     'TV-G': 0,
3271     'TV-PG': 0,
3272     'TV-14': 14,
3273     'TV-MA': 17,
3274 }
3275
3276
3277 def parse_age_limit(s):
3278     # isinstance(False, int) is True. So type() must be used instead
3279     if type(s) is int:  # noqa: E721
3280         return s if 0 <= s <= 21 else None
3281     elif not isinstance(s, str):
3282         return None
3283     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3284     if m:
3285         return int(m.group('age'))
3286     s = s.upper()
3287     if s in US_RATINGS:
3288         return US_RATINGS[s]
3289     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3290     if m:
3291         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3292     return None
3293
3294
3295 def strip_jsonp(code):
3296     return re.sub(
3297         r'''(?sx)^
3298             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3299             (?:\s*&&\s*(?P=func_name))?
3300             \s*\(\s*(?P<callback_data>.*)\);?
3301             \s*?(?://[^\n]*)*$''',
3302         r'\g<callback_data>', code)
3303
3304
3305 def js_to_json(code, vars={}, *, strict=False):
3306     # vars is a dict of var, val pairs to substitute
3307     STRING_QUOTES = '\'"'
3308     STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
3309     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3310     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3311     INTEGER_TABLE = (
3312         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3313         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3314     )
3315
3316     def process_escape(match):
3317         JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
3318         escape = match.group(1) or match.group(2)
3319
3320         return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
3321                 else R'\u00' if escape == 'x'
3322                 else '' if escape == '\n'
3323                 else escape)
3324
3325     def fix_kv(m):
3326         v = m.group(0)
3327         if v in ('true', 'false', 'null'):
3328             return v
3329         elif v in ('undefined', 'void 0'):
3330             return 'null'
3331         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3332             return ''
3333
3334         if v[0] in STRING_QUOTES:
3335             escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v[1:-1])
3336             return f'"{escaped}"'
3337
3338         for regex, base in INTEGER_TABLE:
3339             im = re.match(regex, v)
3340             if im:
3341                 i = int(im.group(1), base)
3342                 return f'"{i}":' if v.endswith(':') else str(i)
3343
3344         if v in vars:
3345             return json.dumps(vars[v])
3346
3347         if not strict:
3348             return f'"{v}"'
3349
3350         raise ValueError(f'Unknown value: {v}')
3351
3352     def create_map(mobj):
3353         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
3354
3355     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
3356     if not strict:
3357         code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3358         code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
3359
3360     return re.sub(rf'''(?sx)
3361         {STRING_RE}|
3362         {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
3363         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3364         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
3365         [0-9]+(?={SKIP_RE}:)|
3366         !+
3367         ''', fix_kv, code)
3368
3369
3370 def qualities(quality_ids):
3371     """ Get a numeric quality value out of a list of possible values """
3372     def q(qid):
3373         try:
3374             return quality_ids.index(qid)
3375         except ValueError:
3376             return -1
3377     return q
3378
3379
3380 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3381
3382
3383 DEFAULT_OUTTMPL = {
3384     'default': '%(title)s [%(id)s].%(ext)s',
3385     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3386 }
3387 OUTTMPL_TYPES = {
3388     'chapter': None,
3389     'subtitle': None,
3390     'thumbnail': None,
3391     'description': 'description',
3392     'annotation': 'annotations.xml',
3393     'infojson': 'info.json',
3394     'link': None,
3395     'pl_video': None,
3396     'pl_thumbnail': None,
3397     'pl_description': 'description',
3398     'pl_infojson': 'info.json',
3399 }
3400
3401 # As of [1] format syntax is:
3402 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3403 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3404 STR_FORMAT_RE_TMPL = r'''(?x)
3405     (?<!%)(?P<prefix>(?:%%)*)
3406     %
3407     (?P<has_key>\((?P<key>{0})\))?
3408     (?P<format>
3409         (?P<conversion>[#0\-+ ]+)?
3410         (?P<min_width>\d+)?
3411         (?P<precision>\.\d+)?
3412         (?P<len_mod>[hlL])?  # unused in python
3413         {1}  # conversion type
3414     )
3415 '''
3416
3417
3418 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3419
3420
3421 def limit_length(s, length):
3422     """ Add ellipses to overly long strings """
3423     if s is None:
3424         return None
3425     ELLIPSES = '...'
3426     if len(s) > length:
3427         return s[:length - len(ELLIPSES)] + ELLIPSES
3428     return s
3429
3430
3431 def version_tuple(v):
3432     return tuple(int(e) for e in re.split(r'[-.]', v))
3433
3434
3435 def is_outdated_version(version, limit, assume_new=True):
3436     if not version:
3437         return not assume_new
3438     try:
3439         return version_tuple(version) < version_tuple(limit)
3440     except ValueError:
3441         return not assume_new
3442
3443
3444 def ytdl_is_updateable():
3445     """ Returns if yt-dlp can be updated with -U """
3446
3447     from .update import is_non_updateable
3448
3449     return not is_non_updateable()
3450
3451
3452 def args_to_str(args):
3453     # Get a short string representation for a subprocess command
3454     return ' '.join(compat_shlex_quote(a) for a in args)
3455
3456
3457 def error_to_compat_str(err):
3458     return str(err)
3459
3460
3461 def error_to_str(err):
3462     return f'{type(err).__name__}: {err}'
3463
3464
3465 def mimetype2ext(mt):
3466     if mt is None:
3467         return None
3468
3469     mt, _, params = mt.partition(';')
3470     mt = mt.strip()
3471
3472     FULL_MAP = {
3473         'audio/mp4': 'm4a',
3474         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3475         # it's the most popular one
3476         'audio/mpeg': 'mp3',
3477         'audio/x-wav': 'wav',
3478         'audio/wav': 'wav',
3479         'audio/wave': 'wav',
3480     }
3481
3482     ext = FULL_MAP.get(mt)
3483     if ext is not None:
3484         return ext
3485
3486     SUBTYPE_MAP = {
3487         '3gpp': '3gp',
3488         'smptett+xml': 'tt',
3489         'ttaf+xml': 'dfxp',
3490         'ttml+xml': 'ttml',
3491         'x-flv': 'flv',
3492         'x-mp4-fragmented': 'mp4',
3493         'x-ms-sami': 'sami',
3494         'x-ms-wmv': 'wmv',
3495         'mpegurl': 'm3u8',
3496         'x-mpegurl': 'm3u8',
3497         'vnd.apple.mpegurl': 'm3u8',
3498         'dash+xml': 'mpd',
3499         'f4m+xml': 'f4m',
3500         'hds+xml': 'f4m',
3501         'vnd.ms-sstr+xml': 'ism',
3502         'quicktime': 'mov',
3503         'mp2t': 'ts',
3504         'x-wav': 'wav',
3505         'filmstrip+json': 'fs',
3506         'svg+xml': 'svg',
3507     }
3508
3509     _, _, subtype = mt.rpartition('/')
3510     ext = SUBTYPE_MAP.get(subtype.lower())
3511     if ext is not None:
3512         return ext
3513
3514     SUFFIX_MAP = {
3515         'json': 'json',
3516         'xml': 'xml',
3517         'zip': 'zip',
3518         'gzip': 'gz',
3519     }
3520
3521     _, _, suffix = subtype.partition('+')
3522     ext = SUFFIX_MAP.get(suffix)
3523     if ext is not None:
3524         return ext
3525
3526     return subtype.replace('+', '.')
3527
3528
3529 def ext2mimetype(ext_or_url):
3530     if not ext_or_url:
3531         return None
3532     if '.' not in ext_or_url:
3533         ext_or_url = f'file.{ext_or_url}'
3534     return mimetypes.guess_type(ext_or_url)[0]
3535
3536
3537 def parse_codecs(codecs_str):
3538     # http://tools.ietf.org/html/rfc6381
3539     if not codecs_str:
3540         return {}
3541     split_codecs = list(filter(None, map(
3542         str.strip, codecs_str.strip().strip(',').split(','))))
3543     vcodec, acodec, scodec, hdr = None, None, None, None
3544     for full_codec in split_codecs:
3545         parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
3546         if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3547                         'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3548             if vcodec:
3549                 continue
3550             vcodec = full_codec
3551             if parts[0] in ('dvh1', 'dvhe'):
3552                 hdr = 'DV'
3553             elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
3554                 hdr = 'HDR10'
3555             elif parts[:2] == ['vp9', '2']:
3556                 hdr = 'HDR10'
3557         elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac',
3558                           'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3559             acodec = acodec or full_codec
3560         elif parts[0] in ('stpp', 'wvtt'):
3561             scodec = scodec or full_codec
3562         else:
3563             write_string(f'WARNING: Unknown codec {full_codec}\n')
3564     if vcodec or acodec or scodec:
3565         return {
3566             'vcodec': vcodec or 'none',
3567             'acodec': acodec or 'none',
3568             'dynamic_range': hdr,
3569             **({'scodec': scodec} if scodec is not None else {}),
3570         }
3571     elif len(split_codecs) == 2:
3572         return {
3573             'vcodec': split_codecs[0],
3574             'acodec': split_codecs[1],
3575         }
3576     return {}
3577
3578
3579 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
3580     assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
3581
3582     allow_mkv = not preferences or 'mkv' in preferences
3583
3584     if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3585         return 'mkv'  # TODO: any other format allows this?
3586
3587     # TODO: All codecs supported by parse_codecs isn't handled here
3588     COMPATIBLE_CODECS = {
3589         'mp4': {
3590             'av1', 'hevc', 'avc1', 'mp4a',  # fourcc (m3u8, mpd)
3591             'h264', 'aacl', 'ec-3',  # Set in ISM
3592         },
3593         'webm': {
3594             'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3595             'vp9x', 'vp8x',  # in the webm spec
3596         },
3597     }
3598
3599     sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
3600     vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3601
3602     for ext in preferences or COMPATIBLE_CODECS.keys():
3603         codec_set = COMPATIBLE_CODECS.get(ext, set())
3604         if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3605             return ext
3606
3607     COMPATIBLE_EXTS = (
3608         {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3609         {'webm'},
3610     )
3611     for ext in preferences or vexts:
3612         current_exts = {ext, *vexts, *aexts}
3613         if ext == 'mkv' or current_exts == {ext} or any(
3614                 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3615             return ext
3616     return 'mkv' if allow_mkv else preferences[-1]
3617
3618
3619 def urlhandle_detect_ext(url_handle):
3620     getheader = url_handle.headers.get
3621
3622     cd = getheader('Content-Disposition')
3623     if cd:
3624         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3625         if m:
3626             e = determine_ext(m.group('filename'), default_ext=None)
3627             if e:
3628                 return e
3629
3630     return mimetype2ext(getheader('Content-Type'))
3631
3632
3633 def encode_data_uri(data, mime_type):
3634     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3635
3636
3637 def age_restricted(content_limit, age_limit):
3638     """ Returns True iff the content should be blocked """
3639
3640     if age_limit is None:  # No limit set
3641         return False
3642     if content_limit is None:
3643         return False  # Content available for everyone
3644     return age_limit < content_limit
3645
3646
3647 # List of known byte-order-marks (BOM)
3648 BOMS = [
3649     (b'\xef\xbb\xbf', 'utf-8'),
3650     (b'\x00\x00\xfe\xff', 'utf-32-be'),
3651     (b'\xff\xfe\x00\x00', 'utf-32-le'),
3652     (b'\xff\xfe', 'utf-16-le'),
3653     (b'\xfe\xff', 'utf-16-be'),
3654 ]
3655
3656
3657 def is_html(first_bytes):
3658     """ Detect whether a file contains HTML by examining its first bytes. """
3659
3660     encoding = 'utf-8'
3661     for bom, enc in BOMS:
3662         while first_bytes.startswith(bom):
3663             encoding, first_bytes = enc, first_bytes[len(bom):]
3664
3665     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3666
3667
3668 def determine_protocol(info_dict):
3669     protocol = info_dict.get('protocol')
3670     if protocol is not None:
3671         return protocol
3672
3673     url = sanitize_url(info_dict['url'])
3674     if url.startswith('rtmp'):
3675         return 'rtmp'
3676     elif url.startswith('mms'):
3677         return 'mms'
3678     elif url.startswith('rtsp'):
3679         return 'rtsp'
3680
3681     ext = determine_ext(url)
3682     if ext == 'm3u8':
3683         return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
3684     elif ext == 'f4m':
3685         return 'f4m'
3686
3687     return urllib.parse.urlparse(url).scheme
3688
3689
3690 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3691     """ Render a list of rows, each as a list of values.
3692     Text after a \t will be right aligned """
3693     def width(string):
3694         return len(remove_terminal_sequences(string).replace('\t', ''))
3695
3696     def get_max_lens(table):
3697         return [max(width(str(v)) for v in col) for col in zip(*table)]
3698
3699     def filter_using_list(row, filterArray):
3700         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3701
3702     max_lens = get_max_lens(data) if hide_empty else []
3703     header_row = filter_using_list(header_row, max_lens)
3704     data = [filter_using_list(row, max_lens) for row in data]
3705
3706     table = [header_row] + data
3707     max_lens = get_max_lens(table)
3708     extra_gap += 1
3709     if delim:
3710         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3711         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3712     for row in table:
3713         for pos, text in enumerate(map(str, row)):
3714             if '\t' in text:
3715                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3716             else:
3717                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3718     ret = '\n'.join(''.join(row).rstrip() for row in table)
3719     return ret
3720
3721
3722 def _match_one(filter_part, dct, incomplete):
3723     # TODO: Generalize code with YoutubeDL._build_format_filter
3724     STRING_OPERATORS = {
3725         '*=': operator.contains,
3726         '^=': lambda attr, value: attr.startswith(value),
3727         '$=': lambda attr, value: attr.endswith(value),
3728         '~=': lambda attr, value: re.search(value, attr),
3729     }
3730     COMPARISON_OPERATORS = {
3731         **STRING_OPERATORS,
3732         '<=': operator.le,  # "<=" must be defined above "<"
3733         '<': operator.lt,
3734         '>=': operator.ge,
3735         '>': operator.gt,
3736         '=': operator.eq,
3737     }
3738
3739     if isinstance(incomplete, bool):
3740         is_incomplete = lambda _: incomplete
3741     else:
3742         is_incomplete = lambda k: k in incomplete
3743
3744     operator_rex = re.compile(r'''(?x)
3745         (?P<key>[a-z_]+)
3746         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3747         (?:
3748             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3749             (?P<strval>.+?)
3750         )
3751         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3752     m = operator_rex.fullmatch(filter_part.strip())
3753     if m:
3754         m = m.groupdict()
3755         unnegated_op = COMPARISON_OPERATORS[m['op']]
3756         if m['negation']:
3757             op = lambda attr, value: not unnegated_op(attr, value)
3758         else:
3759             op = unnegated_op
3760         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3761         if m['quote']:
3762             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3763         actual_value = dct.get(m['key'])
3764         numeric_comparison = None
3765         if isinstance(actual_value, (int, float)):
3766             # If the original field is a string and matching comparisonvalue is
3767             # a number we should respect the origin of the original field
3768             # and process comparison value as a string (see
3769             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3770             try:
3771                 numeric_comparison = int(comparison_value)
3772             except ValueError:
3773                 numeric_comparison = parse_filesize(comparison_value)
3774                 if numeric_comparison is None:
3775                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3776                 if numeric_comparison is None:
3777                     numeric_comparison = parse_duration(comparison_value)
3778         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3779             raise ValueError('Operator %s only supports string values!' % m['op'])
3780         if actual_value is None:
3781             return is_incomplete(m['key']) or m['none_inclusive']
3782         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3783
3784     UNARY_OPERATORS = {
3785         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3786         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3787     }
3788     operator_rex = re.compile(r'''(?x)
3789         (?P<op>%s)\s*(?P<key>[a-z_]+)
3790         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3791     m = operator_rex.fullmatch(filter_part.strip())
3792     if m:
3793         op = UNARY_OPERATORS[m.group('op')]
3794         actual_value = dct.get(m.group('key'))
3795         if is_incomplete(m.group('key')) and actual_value is None:
3796             return True
3797         return op(actual_value)
3798
3799     raise ValueError('Invalid filter part %r' % filter_part)
3800
3801
3802 def match_str(filter_str, dct, incomplete=False):
3803     """ Filter a dictionary with a simple string syntax.
3804     @returns           Whether the filter passes
3805     @param incomplete  Set of keys that is expected to be missing from dct.
3806                        Can be True/False to indicate all/none of the keys may be missing.
3807                        All conditions on incomplete keys pass if the key is missing
3808     """
3809     return all(
3810         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3811         for filter_part in re.split(r'(?<!\\)&', filter_str))
3812
3813
3814 def match_filter_func(filters):
3815     if not filters:
3816         return None
3817     filters = set(variadic(filters))
3818
3819     interactive = '-' in filters
3820     if interactive:
3821         filters.remove('-')
3822
3823     def _match_func(info_dict, incomplete=False):
3824         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3825             return NO_DEFAULT if interactive and not incomplete else None
3826         else:
3827             video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
3828             filter_str = ') | ('.join(map(str.strip, filters))
3829             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3830     return _match_func
3831
3832
3833 class download_range_func:
3834     def __init__(self, chapters, ranges):
3835         self.chapters, self.ranges = chapters, ranges
3836
3837     def __call__(self, info_dict, ydl):
3838         if not self.ranges and not self.chapters:
3839             yield {}
3840
3841         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3842                    else 'Cannot match chapters since chapter information is unavailable')
3843         for regex in self.chapters or []:
3844             for i, chapter in enumerate(info_dict.get('chapters') or []):
3845                 if re.search(regex, chapter['title']):
3846                     warning = None
3847                     yield {**chapter, 'index': i}
3848         if self.chapters and warning:
3849             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3850
3851         yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
3852
3853     def __eq__(self, other):
3854         return (isinstance(other, download_range_func)
3855                 and self.chapters == other.chapters and self.ranges == other.ranges)
3856
3857
3858 def parse_dfxp_time_expr(time_expr):
3859     if not time_expr:
3860         return
3861
3862     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3863     if mobj:
3864         return float(mobj.group('time_offset'))
3865
3866     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3867     if mobj:
3868         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3869
3870
3871 def srt_subtitles_timecode(seconds):
3872     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3873
3874
3875 def ass_subtitles_timecode(seconds):
3876     time = timetuple_from_msec(seconds * 1000)
3877     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3878
3879
3880 def dfxp2srt(dfxp_data):
3881     '''
3882     @param dfxp_data A bytes-like object containing DFXP data
3883     @returns A unicode object containing converted SRT data
3884     '''
3885     LEGACY_NAMESPACES = (
3886         (b'http://www.w3.org/ns/ttml', [
3887             b'http://www.w3.org/2004/11/ttaf1',
3888             b'http://www.w3.org/2006/04/ttaf1',
3889             b'http://www.w3.org/2006/10/ttaf1',
3890         ]),
3891         (b'http://www.w3.org/ns/ttml#styling', [
3892             b'http://www.w3.org/ns/ttml#style',
3893         ]),
3894     )
3895
3896     SUPPORTED_STYLING = [
3897         'color',
3898         'fontFamily',
3899         'fontSize',
3900         'fontStyle',
3901         'fontWeight',
3902         'textDecoration'
3903     ]
3904
3905     _x = functools.partial(xpath_with_ns, ns_map={
3906         'xml': 'http://www.w3.org/XML/1998/namespace',
3907         'ttml': 'http://www.w3.org/ns/ttml',
3908         'tts': 'http://www.w3.org/ns/ttml#styling',
3909     })
3910
3911     styles = {}
3912     default_style = {}
3913
3914     class TTMLPElementParser:
3915         _out = ''
3916         _unclosed_elements = []
3917         _applied_styles = []
3918
3919         def start(self, tag, attrib):
3920             if tag in (_x('ttml:br'), 'br'):
3921                 self._out += '\n'
3922             else:
3923                 unclosed_elements = []
3924                 style = {}
3925                 element_style_id = attrib.get('style')
3926                 if default_style:
3927                     style.update(default_style)
3928                 if element_style_id:
3929                     style.update(styles.get(element_style_id, {}))
3930                 for prop in SUPPORTED_STYLING:
3931                     prop_val = attrib.get(_x('tts:' + prop))
3932                     if prop_val:
3933                         style[prop] = prop_val
3934                 if style:
3935                     font = ''
3936                     for k, v in sorted(style.items()):
3937                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3938                             continue
3939                         if k == 'color':
3940                             font += ' color="%s"' % v
3941                         elif k == 'fontSize':
3942                             font += ' size="%s"' % v
3943                         elif k == 'fontFamily':
3944                             font += ' face="%s"' % v
3945                         elif k == 'fontWeight' and v == 'bold':
3946                             self._out += '<b>'
3947                             unclosed_elements.append('b')
3948                         elif k == 'fontStyle' and v == 'italic':
3949                             self._out += '<i>'
3950                             unclosed_elements.append('i')
3951                         elif k == 'textDecoration' and v == 'underline':
3952                             self._out += '<u>'
3953                             unclosed_elements.append('u')
3954                     if font:
3955                         self._out += '<font' + font + '>'
3956                         unclosed_elements.append('font')
3957                     applied_style = {}
3958                     if self._applied_styles:
3959                         applied_style.update(self._applied_styles[-1])
3960                     applied_style.update(style)
3961                     self._applied_styles.append(applied_style)
3962                 self._unclosed_elements.append(unclosed_elements)
3963
3964         def end(self, tag):
3965             if tag not in (_x('ttml:br'), 'br'):
3966                 unclosed_elements = self._unclosed_elements.pop()
3967                 for element in reversed(unclosed_elements):
3968                     self._out += '</%s>' % element
3969                 if unclosed_elements and self._applied_styles:
3970                     self._applied_styles.pop()
3971
3972         def data(self, data):
3973             self._out += data
3974
3975         def close(self):
3976             return self._out.strip()
3977
3978     def parse_node(node):
3979         target = TTMLPElementParser()
3980         parser = xml.etree.ElementTree.XMLParser(target=target)
3981         parser.feed(xml.etree.ElementTree.tostring(node))
3982         return parser.close()
3983
3984     for k, v in LEGACY_NAMESPACES:
3985         for ns in v:
3986             dfxp_data = dfxp_data.replace(ns, k)
3987
3988     dfxp = compat_etree_fromstring(dfxp_data)
3989     out = []
3990     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3991
3992     if not paras:
3993         raise ValueError('Invalid dfxp/TTML subtitle')
3994
3995     repeat = False
3996     while True:
3997         for style in dfxp.findall(_x('.//ttml:style')):
3998             style_id = style.get('id') or style.get(_x('xml:id'))
3999             if not style_id:
4000                 continue
4001             parent_style_id = style.get('style')
4002             if parent_style_id:
4003                 if parent_style_id not in styles:
4004                     repeat = True
4005                     continue
4006                 styles[style_id] = styles[parent_style_id].copy()
4007             for prop in SUPPORTED_STYLING:
4008                 prop_val = style.get(_x('tts:' + prop))
4009                 if prop_val:
4010                     styles.setdefault(style_id, {})[prop] = prop_val
4011         if repeat:
4012             repeat = False
4013         else:
4014             break
4015
4016     for p in ('body', 'div'):
4017         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4018         if ele is None:
4019             continue
4020         style = styles.get(ele.get('style'))
4021         if not style:
4022             continue
4023         default_style.update(style)
4024
4025     for para, index in zip(paras, itertools.count(1)):
4026         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4027         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4028         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4029         if begin_time is None:
4030             continue
4031         if not end_time:
4032             if not dur:
4033                 continue
4034             end_time = begin_time + dur
4035         out.append('%d\n%s --> %s\n%s\n\n' % (
4036             index,
4037             srt_subtitles_timecode(begin_time),
4038             srt_subtitles_timecode(end_time),
4039             parse_node(para)))
4040
4041     return ''.join(out)
4042
4043
4044 def cli_option(params, command_option, param, separator=None):
4045     param = params.get(param)
4046     return ([] if param is None
4047             else [command_option, str(param)] if separator is None
4048             else [f'{command_option}{separator}{param}'])
4049
4050
4051 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4052     param = params.get(param)
4053     assert param in (True, False, None)
4054     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
4055
4056
4057 def cli_valueless_option(params, command_option, param, expected_value=True):
4058     return [command_option] if params.get(param) == expected_value else []
4059
4060
4061 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4062     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4063         if use_compat:
4064             return argdict
4065         else:
4066             argdict = None
4067     if argdict is None:
4068         return default
4069     assert isinstance(argdict, dict)
4070
4071     assert isinstance(keys, (list, tuple))
4072     for key_list in keys:
4073         arg_list = list(filter(
4074             lambda x: x is not None,
4075             [argdict.get(key.lower()) for key in variadic(key_list)]))
4076         if arg_list:
4077             return [arg for args in arg_list for arg in args]
4078     return default
4079
4080
4081 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
4082     main_key, exe = main_key.lower(), exe.lower()
4083     root_key = exe if main_key == exe else f'{main_key}+{exe}'
4084     keys = [f'{root_key}{k}' for k in (keys or [''])]
4085     if root_key in keys:
4086         if main_key != exe:
4087             keys.append((main_key, exe))
4088         keys.append('default')
4089     else:
4090         use_compat = False
4091     return cli_configuration_args(argdict, keys, default, use_compat)
4092
4093
4094 class ISO639Utils:
4095     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4096     _lang_map = {
4097         'aa': 'aar',
4098         'ab': 'abk',
4099         'ae': 'ave',
4100         'af': 'afr',
4101         'ak': 'aka',
4102         'am': 'amh',
4103         'an': 'arg',
4104         'ar': 'ara',
4105         'as': 'asm',
4106         'av': 'ava',
4107         'ay': 'aym',
4108         'az': 'aze',
4109         'ba': 'bak',
4110         'be': 'bel',
4111         'bg': 'bul',
4112         'bh': 'bih',
4113         'bi': 'bis',
4114         'bm': 'bam',
4115         'bn': 'ben',
4116         'bo': 'bod',
4117         'br': 'bre',
4118         'bs': 'bos',
4119         'ca': 'cat',
4120         'ce': 'che',
4121         'ch': 'cha',
4122         'co': 'cos',
4123         'cr': 'cre',
4124         'cs': 'ces',
4125         'cu': 'chu',
4126         'cv': 'chv',
4127         'cy': 'cym',
4128         'da': 'dan',
4129         'de': 'deu',
4130         'dv': 'div',
4131         'dz': 'dzo',
4132         'ee': 'ewe',
4133         'el': 'ell',
4134         'en': 'eng',
4135         'eo': 'epo',
4136         'es': 'spa',
4137         'et': 'est',
4138         'eu': 'eus',
4139         'fa': 'fas',
4140         'ff': 'ful',
4141         'fi': 'fin',
4142         'fj': 'fij',
4143         'fo': 'fao',
4144         'fr': 'fra',
4145         'fy': 'fry',
4146         'ga': 'gle',
4147         'gd': 'gla',
4148         'gl': 'glg',
4149         'gn': 'grn',
4150         'gu': 'guj',
4151         'gv': 'glv',
4152         'ha': 'hau',
4153         'he': 'heb',
4154         'iw': 'heb',  # Replaced by he in 1989 revision
4155         'hi': 'hin',
4156         'ho': 'hmo',
4157         'hr': 'hrv',
4158         'ht': 'hat',
4159         'hu': 'hun',
4160         'hy': 'hye',
4161         'hz': 'her',
4162         'ia': 'ina',
4163         'id': 'ind',
4164         'in': 'ind',  # Replaced by id in 1989 revision
4165         'ie': 'ile',
4166         'ig': 'ibo',
4167         'ii': 'iii',
4168         'ik': 'ipk',
4169         'io': 'ido',
4170         'is': 'isl',
4171         'it': 'ita',
4172         'iu': 'iku',
4173         'ja': 'jpn',
4174         'jv': 'jav',
4175         'ka': 'kat',
4176         'kg': 'kon',
4177         'ki': 'kik',
4178         'kj': 'kua',
4179         'kk': 'kaz',
4180         'kl': 'kal',
4181         'km': 'khm',
4182         'kn': 'kan',
4183         'ko': 'kor',
4184         'kr': 'kau',
4185         'ks': 'kas',
4186         'ku': 'kur',
4187         'kv': 'kom',
4188         'kw': 'cor',
4189         'ky': 'kir',
4190         'la': 'lat',
4191         'lb': 'ltz',
4192         'lg': 'lug',
4193         'li': 'lim',
4194         'ln': 'lin',
4195         'lo': 'lao',
4196         'lt': 'lit',
4197         'lu': 'lub',
4198         'lv': 'lav',
4199         'mg': 'mlg',
4200         'mh': 'mah',
4201         'mi': 'mri',
4202         'mk': 'mkd',
4203         'ml': 'mal',
4204         'mn': 'mon',
4205         'mr': 'mar',
4206         'ms': 'msa',
4207         'mt': 'mlt',
4208         'my': 'mya',
4209         'na': 'nau',
4210         'nb': 'nob',
4211         'nd': 'nde',
4212         'ne': 'nep',
4213         'ng': 'ndo',
4214         'nl': 'nld',
4215         'nn': 'nno',
4216         'no': 'nor',
4217         'nr': 'nbl',
4218         'nv': 'nav',
4219         'ny': 'nya',
4220         'oc': 'oci',
4221         'oj': 'oji',
4222         'om': 'orm',
4223         'or': 'ori',
4224         'os': 'oss',
4225         'pa': 'pan',
4226         'pi': 'pli',
4227         'pl': 'pol',
4228         'ps': 'pus',
4229         'pt': 'por',
4230         'qu': 'que',
4231         'rm': 'roh',
4232         'rn': 'run',
4233         'ro': 'ron',
4234         'ru': 'rus',
4235         'rw': 'kin',
4236         'sa': 'san',
4237         'sc': 'srd',
4238         'sd': 'snd',
4239         'se': 'sme',
4240         'sg': 'sag',
4241         'si': 'sin',
4242         'sk': 'slk',
4243         'sl': 'slv',
4244         'sm': 'smo',
4245         'sn': 'sna',
4246         'so': 'som',
4247         'sq': 'sqi',
4248         'sr': 'srp',
4249         'ss': 'ssw',
4250         'st': 'sot',
4251         'su': 'sun',
4252         'sv': 'swe',
4253         'sw': 'swa',
4254         'ta': 'tam',
4255         'te': 'tel',
4256         'tg': 'tgk',
4257         'th': 'tha',
4258         'ti': 'tir',
4259         'tk': 'tuk',
4260         'tl': 'tgl',
4261         'tn': 'tsn',
4262         'to': 'ton',
4263         'tr': 'tur',
4264         'ts': 'tso',
4265         'tt': 'tat',
4266         'tw': 'twi',
4267         'ty': 'tah',
4268         'ug': 'uig',
4269         'uk': 'ukr',
4270         'ur': 'urd',
4271         'uz': 'uzb',
4272         've': 'ven',
4273         'vi': 'vie',
4274         'vo': 'vol',
4275         'wa': 'wln',
4276         'wo': 'wol',
4277         'xh': 'xho',
4278         'yi': 'yid',
4279         'ji': 'yid',  # Replaced by yi in 1989 revision
4280         'yo': 'yor',
4281         'za': 'zha',
4282         'zh': 'zho',
4283         'zu': 'zul',
4284     }
4285
4286     @classmethod
4287     def short2long(cls, code):
4288         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4289         return cls._lang_map.get(code[:2])
4290
4291     @classmethod
4292     def long2short(cls, code):
4293         """Convert language code from ISO 639-2/T to ISO 639-1"""
4294         for short_name, long_name in cls._lang_map.items():
4295             if long_name == code:
4296                 return short_name
4297
4298
4299 class ISO3166Utils:
4300     # From http://data.okfn.org/data/core/country-list
4301     _country_map = {
4302         'AF': 'Afghanistan',
4303         'AX': 'Åland Islands',
4304         'AL': 'Albania',
4305         'DZ': 'Algeria',
4306         'AS': 'American Samoa',
4307         'AD': 'Andorra',
4308         'AO': 'Angola',
4309         'AI': 'Anguilla',
4310         'AQ': 'Antarctica',
4311         'AG': 'Antigua and Barbuda',
4312         'AR': 'Argentina',
4313         'AM': 'Armenia',
4314         'AW': 'Aruba',
4315         'AU': 'Australia',
4316         'AT': 'Austria',
4317         'AZ': 'Azerbaijan',
4318         'BS': 'Bahamas',
4319         'BH': 'Bahrain',
4320         'BD': 'Bangladesh',
4321         'BB': 'Barbados',
4322         'BY': 'Belarus',
4323         'BE': 'Belgium',
4324         'BZ': 'Belize',
4325         'BJ': 'Benin',
4326         'BM': 'Bermuda',
4327         'BT': 'Bhutan',
4328         'BO': 'Bolivia, Plurinational State of',
4329         'BQ': 'Bonaire, Sint Eustatius and Saba',
4330         'BA': 'Bosnia and Herzegovina',
4331         'BW': 'Botswana',
4332         'BV': 'Bouvet Island',
4333         'BR': 'Brazil',
4334         'IO': 'British Indian Ocean Territory',
4335         'BN': 'Brunei Darussalam',
4336         'BG': 'Bulgaria',
4337         'BF': 'Burkina Faso',
4338         'BI': 'Burundi',
4339         'KH': 'Cambodia',
4340         'CM': 'Cameroon',
4341         'CA': 'Canada',
4342         'CV': 'Cape Verde',
4343         'KY': 'Cayman Islands',
4344         'CF': 'Central African Republic',
4345         'TD': 'Chad',
4346         'CL': 'Chile',
4347         'CN': 'China',
4348         'CX': 'Christmas Island',
4349         'CC': 'Cocos (Keeling) Islands',
4350         'CO': 'Colombia',
4351         'KM': 'Comoros',
4352         'CG': 'Congo',
4353         'CD': 'Congo, the Democratic Republic of the',
4354         'CK': 'Cook Islands',
4355         'CR': 'Costa Rica',
4356         'CI': 'Côte d\'Ivoire',
4357         'HR': 'Croatia',
4358         'CU': 'Cuba',
4359         'CW': 'Curaçao',
4360         'CY': 'Cyprus',
4361         'CZ': 'Czech Republic',
4362         'DK': 'Denmark',
4363         'DJ': 'Djibouti',
4364         'DM': 'Dominica',
4365         'DO': 'Dominican Republic',
4366         'EC': 'Ecuador',
4367         'EG': 'Egypt',
4368         'SV': 'El Salvador',
4369         'GQ': 'Equatorial Guinea',
4370         'ER': 'Eritrea',
4371         'EE': 'Estonia',
4372         'ET': 'Ethiopia',
4373         'FK': 'Falkland Islands (Malvinas)',
4374         'FO': 'Faroe Islands',
4375         'FJ': 'Fiji',
4376         'FI': 'Finland',
4377         'FR': 'France',
4378         'GF': 'French Guiana',
4379         'PF': 'French Polynesia',
4380         'TF': 'French Southern Territories',
4381         'GA': 'Gabon',
4382         'GM': 'Gambia',
4383         'GE': 'Georgia',
4384         'DE': 'Germany',
4385         'GH': 'Ghana',
4386         'GI': 'Gibraltar',
4387         'GR': 'Greece',
4388         'GL': 'Greenland',
4389         'GD': 'Grenada',
4390         'GP': 'Guadeloupe',
4391         'GU': 'Guam',
4392         'GT': 'Guatemala',
4393         'GG': 'Guernsey',
4394         'GN': 'Guinea',
4395         'GW': 'Guinea-Bissau',
4396         'GY': 'Guyana',
4397         'HT': 'Haiti',
4398         'HM': 'Heard Island and McDonald Islands',
4399         'VA': 'Holy See (Vatican City State)',
4400         'HN': 'Honduras',
4401         'HK': 'Hong Kong',
4402         'HU': 'Hungary',
4403         'IS': 'Iceland',
4404         'IN': 'India',
4405         'ID': 'Indonesia',
4406         'IR': 'Iran, Islamic Republic of',
4407         'IQ': 'Iraq',
4408         'IE': 'Ireland',
4409         'IM': 'Isle of Man',
4410         'IL': 'Israel',
4411         'IT': 'Italy',
4412         'JM': 'Jamaica',
4413         'JP': 'Japan',
4414         'JE': 'Jersey',
4415         'JO': 'Jordan',
4416         'KZ': 'Kazakhstan',
4417         'KE': 'Kenya',
4418         'KI': 'Kiribati',
4419         'KP': 'Korea, Democratic People\'s Republic of',
4420         'KR': 'Korea, Republic of',
4421         'KW': 'Kuwait',
4422         'KG': 'Kyrgyzstan',
4423         'LA': 'Lao People\'s Democratic Republic',
4424         'LV': 'Latvia',
4425         'LB': 'Lebanon',
4426         'LS': 'Lesotho',
4427         'LR': 'Liberia',
4428         'LY': 'Libya',
4429         'LI': 'Liechtenstein',
4430         'LT': 'Lithuania',
4431         'LU': 'Luxembourg',
4432         'MO': 'Macao',
4433         'MK': 'Macedonia, the Former Yugoslav Republic of',
4434         'MG': 'Madagascar',
4435         'MW': 'Malawi',
4436         'MY': 'Malaysia',
4437         'MV': 'Maldives',
4438         'ML': 'Mali',
4439         'MT': 'Malta',
4440         'MH': 'Marshall Islands',
4441         'MQ': 'Martinique',
4442         'MR': 'Mauritania',
4443         'MU': 'Mauritius',
4444         'YT': 'Mayotte',
4445         'MX': 'Mexico',
4446         'FM': 'Micronesia, Federated States of',
4447         'MD': 'Moldova, Republic of',
4448         'MC': 'Monaco',
4449         'MN': 'Mongolia',
4450         'ME': 'Montenegro',
4451         'MS': 'Montserrat',
4452         'MA': 'Morocco',
4453         'MZ': 'Mozambique',
4454         'MM': 'Myanmar',
4455         'NA': 'Namibia',
4456         'NR': 'Nauru',
4457         'NP': 'Nepal',
4458         'NL': 'Netherlands',
4459         'NC': 'New Caledonia',
4460         'NZ': 'New Zealand',
4461         'NI': 'Nicaragua',
4462         'NE': 'Niger',
4463         'NG': 'Nigeria',
4464         'NU': 'Niue',
4465         'NF': 'Norfolk Island',
4466         'MP': 'Northern Mariana Islands',
4467         'NO': 'Norway',
4468         'OM': 'Oman',
4469         'PK': 'Pakistan',
4470         'PW': 'Palau',
4471         'PS': 'Palestine, State of',
4472         'PA': 'Panama',
4473         'PG': 'Papua New Guinea',
4474         'PY': 'Paraguay',
4475         'PE': 'Peru',
4476         'PH': 'Philippines',
4477         'PN': 'Pitcairn',
4478         'PL': 'Poland',
4479         'PT': 'Portugal',
4480         'PR': 'Puerto Rico',
4481         'QA': 'Qatar',
4482         'RE': 'Réunion',
4483         'RO': 'Romania',
4484         'RU': 'Russian Federation',
4485         'RW': 'Rwanda',
4486         'BL': 'Saint Barthélemy',
4487         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4488         'KN': 'Saint Kitts and Nevis',
4489         'LC': 'Saint Lucia',
4490         'MF': 'Saint Martin (French part)',
4491         'PM': 'Saint Pierre and Miquelon',
4492         'VC': 'Saint Vincent and the Grenadines',
4493         'WS': 'Samoa',
4494         'SM': 'San Marino',
4495         'ST': 'Sao Tome and Principe',
4496         'SA': 'Saudi Arabia',
4497         'SN': 'Senegal',
4498         'RS': 'Serbia',
4499         'SC': 'Seychelles',
4500         'SL': 'Sierra Leone',
4501         'SG': 'Singapore',
4502         'SX': 'Sint Maarten (Dutch part)',
4503         'SK': 'Slovakia',
4504         'SI': 'Slovenia',
4505         'SB': 'Solomon Islands',
4506         'SO': 'Somalia',
4507         'ZA': 'South Africa',
4508         'GS': 'South Georgia and the South Sandwich Islands',
4509         'SS': 'South Sudan',
4510         'ES': 'Spain',
4511         'LK': 'Sri Lanka',
4512         'SD': 'Sudan',
4513         'SR': 'Suriname',
4514         'SJ': 'Svalbard and Jan Mayen',
4515         'SZ': 'Swaziland',
4516         'SE': 'Sweden',
4517         'CH': 'Switzerland',
4518         'SY': 'Syrian Arab Republic',
4519         'TW': 'Taiwan, Province of China',
4520         'TJ': 'Tajikistan',
4521         'TZ': 'Tanzania, United Republic of',
4522         'TH': 'Thailand',
4523         'TL': 'Timor-Leste',
4524         'TG': 'Togo',
4525         'TK': 'Tokelau',
4526         'TO': 'Tonga',
4527         'TT': 'Trinidad and Tobago',
4528         'TN': 'Tunisia',
4529         'TR': 'Turkey',
4530         'TM': 'Turkmenistan',
4531         'TC': 'Turks and Caicos Islands',
4532         'TV': 'Tuvalu',
4533         'UG': 'Uganda',
4534         'UA': 'Ukraine',
4535         'AE': 'United Arab Emirates',
4536         'GB': 'United Kingdom',
4537         'US': 'United States',
4538         'UM': 'United States Minor Outlying Islands',
4539         'UY': 'Uruguay',
4540         'UZ': 'Uzbekistan',
4541         'VU': 'Vanuatu',
4542         'VE': 'Venezuela, Bolivarian Republic of',
4543         'VN': 'Viet Nam',
4544         'VG': 'Virgin Islands, British',
4545         'VI': 'Virgin Islands, U.S.',
4546         'WF': 'Wallis and Futuna',
4547         'EH': 'Western Sahara',
4548         'YE': 'Yemen',
4549         'ZM': 'Zambia',
4550         'ZW': 'Zimbabwe',
4551         # Not ISO 3166 codes, but used for IP blocks
4552         'AP': 'Asia/Pacific Region',
4553         'EU': 'Europe',
4554     }
4555
4556     @classmethod
4557     def short2full(cls, code):
4558         """Convert an ISO 3166-2 country code to the corresponding full name"""
4559         return cls._country_map.get(code.upper())
4560
4561
4562 class GeoUtils:
4563     # Major IPv4 address blocks per country
4564     _country_ip_map = {
4565         'AD': '46.172.224.0/19',
4566         'AE': '94.200.0.0/13',
4567         'AF': '149.54.0.0/17',
4568         'AG': '209.59.64.0/18',
4569         'AI': '204.14.248.0/21',
4570         'AL': '46.99.0.0/16',
4571         'AM': '46.70.0.0/15',
4572         'AO': '105.168.0.0/13',
4573         'AP': '182.50.184.0/21',
4574         'AQ': '23.154.160.0/24',
4575         'AR': '181.0.0.0/12',
4576         'AS': '202.70.112.0/20',
4577         'AT': '77.116.0.0/14',
4578         'AU': '1.128.0.0/11',
4579         'AW': '181.41.0.0/18',
4580         'AX': '185.217.4.0/22',
4581         'AZ': '5.197.0.0/16',
4582         'BA': '31.176.128.0/17',
4583         'BB': '65.48.128.0/17',
4584         'BD': '114.130.0.0/16',
4585         'BE': '57.0.0.0/8',
4586         'BF': '102.178.0.0/15',
4587         'BG': '95.42.0.0/15',
4588         'BH': '37.131.0.0/17',
4589         'BI': '154.117.192.0/18',
4590         'BJ': '137.255.0.0/16',
4591         'BL': '185.212.72.0/23',
4592         'BM': '196.12.64.0/18',
4593         'BN': '156.31.0.0/16',
4594         'BO': '161.56.0.0/16',
4595         'BQ': '161.0.80.0/20',
4596         'BR': '191.128.0.0/12',
4597         'BS': '24.51.64.0/18',
4598         'BT': '119.2.96.0/19',
4599         'BW': '168.167.0.0/16',
4600         'BY': '178.120.0.0/13',
4601         'BZ': '179.42.192.0/18',
4602         'CA': '99.224.0.0/11',
4603         'CD': '41.243.0.0/16',
4604         'CF': '197.242.176.0/21',
4605         'CG': '160.113.0.0/16',
4606         'CH': '85.0.0.0/13',
4607         'CI': '102.136.0.0/14',
4608         'CK': '202.65.32.0/19',
4609         'CL': '152.172.0.0/14',
4610         'CM': '102.244.0.0/14',
4611         'CN': '36.128.0.0/10',
4612         'CO': '181.240.0.0/12',
4613         'CR': '201.192.0.0/12',
4614         'CU': '152.206.0.0/15',
4615         'CV': '165.90.96.0/19',
4616         'CW': '190.88.128.0/17',
4617         'CY': '31.153.0.0/16',
4618         'CZ': '88.100.0.0/14',
4619         'DE': '53.0.0.0/8',
4620         'DJ': '197.241.0.0/17',
4621         'DK': '87.48.0.0/12',
4622         'DM': '192.243.48.0/20',
4623         'DO': '152.166.0.0/15',
4624         'DZ': '41.96.0.0/12',
4625         'EC': '186.68.0.0/15',
4626         'EE': '90.190.0.0/15',
4627         'EG': '156.160.0.0/11',
4628         'ER': '196.200.96.0/20',
4629         'ES': '88.0.0.0/11',
4630         'ET': '196.188.0.0/14',
4631         'EU': '2.16.0.0/13',
4632         'FI': '91.152.0.0/13',
4633         'FJ': '144.120.0.0/16',
4634         'FK': '80.73.208.0/21',
4635         'FM': '119.252.112.0/20',
4636         'FO': '88.85.32.0/19',
4637         'FR': '90.0.0.0/9',
4638         'GA': '41.158.0.0/15',
4639         'GB': '25.0.0.0/8',
4640         'GD': '74.122.88.0/21',
4641         'GE': '31.146.0.0/16',
4642         'GF': '161.22.64.0/18',
4643         'GG': '62.68.160.0/19',
4644         'GH': '154.160.0.0/12',
4645         'GI': '95.164.0.0/16',
4646         'GL': '88.83.0.0/19',
4647         'GM': '160.182.0.0/15',
4648         'GN': '197.149.192.0/18',
4649         'GP': '104.250.0.0/19',
4650         'GQ': '105.235.224.0/20',
4651         'GR': '94.64.0.0/13',
4652         'GT': '168.234.0.0/16',
4653         'GU': '168.123.0.0/16',
4654         'GW': '197.214.80.0/20',
4655         'GY': '181.41.64.0/18',
4656         'HK': '113.252.0.0/14',
4657         'HN': '181.210.0.0/16',
4658         'HR': '93.136.0.0/13',
4659         'HT': '148.102.128.0/17',
4660         'HU': '84.0.0.0/14',
4661         'ID': '39.192.0.0/10',
4662         'IE': '87.32.0.0/12',
4663         'IL': '79.176.0.0/13',
4664         'IM': '5.62.80.0/20',
4665         'IN': '117.192.0.0/10',
4666         'IO': '203.83.48.0/21',
4667         'IQ': '37.236.0.0/14',
4668         'IR': '2.176.0.0/12',
4669         'IS': '82.221.0.0/16',
4670         'IT': '79.0.0.0/10',
4671         'JE': '87.244.64.0/18',
4672         'JM': '72.27.0.0/17',
4673         'JO': '176.29.0.0/16',
4674         'JP': '133.0.0.0/8',
4675         'KE': '105.48.0.0/12',
4676         'KG': '158.181.128.0/17',
4677         'KH': '36.37.128.0/17',
4678         'KI': '103.25.140.0/22',
4679         'KM': '197.255.224.0/20',
4680         'KN': '198.167.192.0/19',
4681         'KP': '175.45.176.0/22',
4682         'KR': '175.192.0.0/10',
4683         'KW': '37.36.0.0/14',
4684         'KY': '64.96.0.0/15',
4685         'KZ': '2.72.0.0/13',
4686         'LA': '115.84.64.0/18',
4687         'LB': '178.135.0.0/16',
4688         'LC': '24.92.144.0/20',
4689         'LI': '82.117.0.0/19',
4690         'LK': '112.134.0.0/15',
4691         'LR': '102.183.0.0/16',
4692         'LS': '129.232.0.0/17',
4693         'LT': '78.56.0.0/13',
4694         'LU': '188.42.0.0/16',
4695         'LV': '46.109.0.0/16',
4696         'LY': '41.252.0.0/14',
4697         'MA': '105.128.0.0/11',
4698         'MC': '88.209.64.0/18',
4699         'MD': '37.246.0.0/16',
4700         'ME': '178.175.0.0/17',
4701         'MF': '74.112.232.0/21',
4702         'MG': '154.126.0.0/17',
4703         'MH': '117.103.88.0/21',
4704         'MK': '77.28.0.0/15',
4705         'ML': '154.118.128.0/18',
4706         'MM': '37.111.0.0/17',
4707         'MN': '49.0.128.0/17',
4708         'MO': '60.246.0.0/16',
4709         'MP': '202.88.64.0/20',
4710         'MQ': '109.203.224.0/19',
4711         'MR': '41.188.64.0/18',
4712         'MS': '208.90.112.0/22',
4713         'MT': '46.11.0.0/16',
4714         'MU': '105.16.0.0/12',
4715         'MV': '27.114.128.0/18',
4716         'MW': '102.70.0.0/15',
4717         'MX': '187.192.0.0/11',
4718         'MY': '175.136.0.0/13',
4719         'MZ': '197.218.0.0/15',
4720         'NA': '41.182.0.0/16',
4721         'NC': '101.101.0.0/18',
4722         'NE': '197.214.0.0/18',
4723         'NF': '203.17.240.0/22',
4724         'NG': '105.112.0.0/12',
4725         'NI': '186.76.0.0/15',
4726         'NL': '145.96.0.0/11',
4727         'NO': '84.208.0.0/13',
4728         'NP': '36.252.0.0/15',
4729         'NR': '203.98.224.0/19',
4730         'NU': '49.156.48.0/22',
4731         'NZ': '49.224.0.0/14',
4732         'OM': '5.36.0.0/15',
4733         'PA': '186.72.0.0/15',
4734         'PE': '186.160.0.0/14',
4735         'PF': '123.50.64.0/18',
4736         'PG': '124.240.192.0/19',
4737         'PH': '49.144.0.0/13',
4738         'PK': '39.32.0.0/11',
4739         'PL': '83.0.0.0/11',
4740         'PM': '70.36.0.0/20',
4741         'PR': '66.50.0.0/16',
4742         'PS': '188.161.0.0/16',
4743         'PT': '85.240.0.0/13',
4744         'PW': '202.124.224.0/20',
4745         'PY': '181.120.0.0/14',
4746         'QA': '37.210.0.0/15',
4747         'RE': '102.35.0.0/16',
4748         'RO': '79.112.0.0/13',
4749         'RS': '93.86.0.0/15',
4750         'RU': '5.136.0.0/13',
4751         'RW': '41.186.0.0/16',
4752         'SA': '188.48.0.0/13',
4753         'SB': '202.1.160.0/19',
4754         'SC': '154.192.0.0/11',
4755         'SD': '102.120.0.0/13',
4756         'SE': '78.64.0.0/12',
4757         'SG': '8.128.0.0/10',
4758         'SI': '188.196.0.0/14',
4759         'SK': '78.98.0.0/15',
4760         'SL': '102.143.0.0/17',
4761         'SM': '89.186.32.0/19',
4762         'SN': '41.82.0.0/15',
4763         'SO': '154.115.192.0/18',
4764         'SR': '186.179.128.0/17',
4765         'SS': '105.235.208.0/21',
4766         'ST': '197.159.160.0/19',
4767         'SV': '168.243.0.0/16',
4768         'SX': '190.102.0.0/20',
4769         'SY': '5.0.0.0/16',
4770         'SZ': '41.84.224.0/19',
4771         'TC': '65.255.48.0/20',
4772         'TD': '154.68.128.0/19',
4773         'TG': '196.168.0.0/14',
4774         'TH': '171.96.0.0/13',
4775         'TJ': '85.9.128.0/18',
4776         'TK': '27.96.24.0/21',
4777         'TL': '180.189.160.0/20',
4778         'TM': '95.85.96.0/19',
4779         'TN': '197.0.0.0/11',
4780         'TO': '175.176.144.0/21',
4781         'TR': '78.160.0.0/11',
4782         'TT': '186.44.0.0/15',
4783         'TV': '202.2.96.0/19',
4784         'TW': '120.96.0.0/11',
4785         'TZ': '156.156.0.0/14',
4786         'UA': '37.52.0.0/14',
4787         'UG': '102.80.0.0/13',
4788         'US': '6.0.0.0/8',
4789         'UY': '167.56.0.0/13',
4790         'UZ': '84.54.64.0/18',
4791         'VA': '212.77.0.0/19',
4792         'VC': '207.191.240.0/21',
4793         'VE': '186.88.0.0/13',
4794         'VG': '66.81.192.0/20',
4795         'VI': '146.226.0.0/16',
4796         'VN': '14.160.0.0/11',
4797         'VU': '202.80.32.0/20',
4798         'WF': '117.20.32.0/21',
4799         'WS': '202.4.32.0/19',
4800         'YE': '134.35.0.0/16',
4801         'YT': '41.242.116.0/22',
4802         'ZA': '41.0.0.0/11',
4803         'ZM': '102.144.0.0/13',
4804         'ZW': '102.177.192.0/18',
4805     }
4806
4807     @classmethod
4808     def random_ipv4(cls, code_or_block):
4809         if len(code_or_block) == 2:
4810             block = cls._country_ip_map.get(code_or_block.upper())
4811             if not block:
4812                 return None
4813         else:
4814             block = code_or_block
4815         addr, preflen = block.split('/')
4816         addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
4817         addr_max = addr_min | (0xffffffff >> int(preflen))
4818         return str(socket.inet_ntoa(
4819             struct.pack('!L', random.randint(addr_min, addr_max))))
4820
4821
4822 class PerRequestProxyHandler(urllib.request.ProxyHandler):
4823     def __init__(self, proxies=None):
4824         # Set default handlers
4825         for type in ('http', 'https'):
4826             setattr(self, '%s_open' % type,
4827                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4828                         meth(r, proxy, type))
4829         urllib.request.ProxyHandler.__init__(self, proxies)
4830
4831     def proxy_open(self, req, proxy, type):
4832         req_proxy = req.headers.get('Ytdl-request-proxy')
4833         if req_proxy is not None:
4834             proxy = req_proxy
4835             del req.headers['Ytdl-request-proxy']
4836
4837         if proxy == '__noproxy__':
4838             return None  # No Proxy
4839         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4840             req.add_header('Ytdl-socks-proxy', proxy)
4841             # yt-dlp's http/https handlers do wrapping the socket with socks
4842             return None
4843         return urllib.request.ProxyHandler.proxy_open(
4844             self, req, proxy, type)
4845
4846
4847 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4848 # released into Public Domain
4849 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4850
4851 def long_to_bytes(n, blocksize=0):
4852     """long_to_bytes(n:long, blocksize:int) : string
4853     Convert a long integer to a byte string.
4854
4855     If optional blocksize is given and greater than zero, pad the front of the
4856     byte string with binary zeros so that the length is a multiple of
4857     blocksize.
4858     """
4859     # after much testing, this algorithm was deemed to be the fastest
4860     s = b''
4861     n = int(n)
4862     while n > 0:
4863         s = struct.pack('>I', n & 0xffffffff) + s
4864         n = n >> 32
4865     # strip off leading zeros
4866     for i in range(len(s)):
4867         if s[i] != b'\000'[0]:
4868             break
4869     else:
4870         # only happens when n == 0
4871         s = b'\000'
4872         i = 0
4873     s = s[i:]
4874     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4875     # de-padding being done above, but sigh...
4876     if blocksize > 0 and len(s) % blocksize:
4877         s = (blocksize - len(s) % blocksize) * b'\000' + s
4878     return s
4879
4880
4881 def bytes_to_long(s):
4882     """bytes_to_long(string) : long
4883     Convert a byte string to a long integer.
4884
4885     This is (essentially) the inverse of long_to_bytes().
4886     """
4887     acc = 0
4888     length = len(s)
4889     if length % 4:
4890         extra = (4 - length % 4)
4891         s = b'\000' * extra + s
4892         length = length + extra
4893     for i in range(0, length, 4):
4894         acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
4895     return acc
4896
4897
4898 def ohdave_rsa_encrypt(data, exponent, modulus):
4899     '''
4900     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4901
4902     Input:
4903         data: data to encrypt, bytes-like object
4904         exponent, modulus: parameter e and N of RSA algorithm, both integer
4905     Output: hex string of encrypted data
4906
4907     Limitation: supports one block encryption only
4908     '''
4909
4910     payload = int(binascii.hexlify(data[::-1]), 16)
4911     encrypted = pow(payload, exponent, modulus)
4912     return '%x' % encrypted
4913
4914
4915 def pkcs1pad(data, length):
4916     """
4917     Padding input data with PKCS#1 scheme
4918
4919     @param {int[]} data        input data
4920     @param {int}   length      target length
4921     @returns {int[]}           padded data
4922     """
4923     if len(data) > length - 11:
4924         raise ValueError('Input data too long for PKCS#1 padding')
4925
4926     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4927     return [0, 2] + pseudo_random + [0] + data
4928
4929
4930 def _base_n_table(n, table):
4931     if not table and not n:
4932         raise ValueError('Either table or n must be specified')
4933     table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
4934
4935     if n and n != len(table):
4936         raise ValueError(f'base {n} exceeds table length {len(table)}')
4937     return table
4938
4939
4940 def encode_base_n(num, n=None, table=None):
4941     """Convert given int to a base-n string"""
4942     table = _base_n_table(n, table)
4943     if not num:
4944         return table[0]
4945
4946     result, base = '', len(table)
4947     while num:
4948         result = table[num % base] + result
4949         num = num // base
4950     return result
4951
4952
4953 def decode_base_n(string, n=None, table=None):
4954     """Convert given base-n string to int"""
4955     table = {char: index for index, char in enumerate(_base_n_table(n, table))}
4956     result, base = 0, len(table)
4957     for char in string:
4958         result = result * base + table[char]
4959     return result
4960
4961
4962 def decode_base(value, digits):
4963     deprecation_warning(f'{__name__}.decode_base is deprecated and may be removed '
4964                         f'in a future version. Use {__name__}.decode_base_n instead')
4965     return decode_base_n(value, table=digits)
4966
4967
4968 def decode_packed_codes(code):
4969     mobj = re.search(PACKED_CODES_RE, code)
4970     obfuscated_code, base, count, symbols = mobj.groups()
4971     base = int(base)
4972     count = int(count)
4973     symbols = symbols.split('|')
4974     symbol_table = {}
4975
4976     while count:
4977         count -= 1
4978         base_n_count = encode_base_n(count, base)
4979         symbol_table[base_n_count] = symbols[count] or base_n_count
4980
4981     return re.sub(
4982         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4983         obfuscated_code)
4984
4985
4986 def caesar(s, alphabet, shift):
4987     if shift == 0:
4988         return s
4989     l = len(alphabet)
4990     return ''.join(
4991         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4992         for c in s)
4993
4994
4995 def rot47(s):
4996     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4997
4998
4999 def parse_m3u8_attributes(attrib):
5000     info = {}
5001     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5002         if val.startswith('"'):
5003             val = val[1:-1]
5004         info[key] = val
5005     return info
5006
5007
5008 def urshift(val, n):
5009     return val >> n if val >= 0 else (val + 0x100000000) >> n
5010
5011
5012 # Based on png2str() written by @gdkchan and improved by @yokrysty
5013 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5014 def decode_png(png_data):
5015     # Reference: https://www.w3.org/TR/PNG/
5016     header = png_data[8:]
5017
5018     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5019         raise OSError('Not a valid PNG file.')
5020
5021     int_map = {1: '>B', 2: '>H', 4: '>I'}
5022     unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
5023
5024     chunks = []
5025
5026     while header:
5027         length = unpack_integer(header[:4])
5028         header = header[4:]
5029
5030         chunk_type = header[:4]
5031         header = header[4:]
5032
5033         chunk_data = header[:length]
5034         header = header[length:]
5035
5036         header = header[4:]  # Skip CRC
5037
5038         chunks.append({
5039             'type': chunk_type,
5040             'length': length,
5041             'data': chunk_data
5042         })
5043
5044     ihdr = chunks[0]['data']
5045
5046     width = unpack_integer(ihdr[:4])
5047     height = unpack_integer(ihdr[4:8])
5048
5049     idat = b''
5050
5051     for chunk in chunks:
5052         if chunk['type'] == b'IDAT':
5053             idat += chunk['data']
5054
5055     if not idat:
5056         raise OSError('Unable to read PNG data.')
5057
5058     decompressed_data = bytearray(zlib.decompress(idat))
5059
5060     stride = width * 3
5061     pixels = []
5062
5063     def _get_pixel(idx):
5064         x = idx % stride
5065         y = idx // stride
5066         return pixels[y][x]
5067
5068     for y in range(height):
5069         basePos = y * (1 + stride)
5070         filter_type = decompressed_data[basePos]
5071
5072         current_row = []
5073
5074         pixels.append(current_row)
5075
5076         for x in range(stride):
5077             color = decompressed_data[1 + basePos + x]
5078             basex = y * stride + x
5079             left = 0
5080             up = 0
5081
5082             if x > 2:
5083                 left = _get_pixel(basex - 3)
5084             if y > 0:
5085                 up = _get_pixel(basex - stride)
5086
5087             if filter_type == 1:  # Sub
5088                 color = (color + left) & 0xff
5089             elif filter_type == 2:  # Up
5090                 color = (color + up) & 0xff
5091             elif filter_type == 3:  # Average
5092                 color = (color + ((left + up) >> 1)) & 0xff
5093             elif filter_type == 4:  # Paeth
5094                 a = left
5095                 b = up
5096                 c = 0
5097
5098                 if x > 2 and y > 0:
5099                     c = _get_pixel(basex - stride - 3)
5100
5101                 p = a + b - c
5102
5103                 pa = abs(p - a)
5104                 pb = abs(p - b)
5105                 pc = abs(p - c)
5106
5107                 if pa <= pb and pa <= pc:
5108                     color = (color + a) & 0xff
5109                 elif pb <= pc:
5110                     color = (color + b) & 0xff
5111                 else:
5112                     color = (color + c) & 0xff
5113
5114             current_row.append(color)
5115
5116     return width, height, pixels
5117
5118
5119 def write_xattr(path, key, value):
5120     # Windows: Write xattrs to NTFS Alternate Data Streams:
5121     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5122     if compat_os_name == 'nt':
5123         assert ':' not in key
5124         assert os.path.exists(path)
5125
5126         try:
5127             with open(f'{path}:{key}', 'wb') as f:
5128                 f.write(value)
5129         except OSError as e:
5130             raise XAttrMetadataError(e.errno, e.strerror)
5131         return
5132
5133     # UNIX Method 1. Use xattrs/pyxattrs modules
5134
5135     setxattr = None
5136     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
5137         # Unicode arguments are not supported in pyxattr until version 0.5.0
5138         # See https://github.com/ytdl-org/youtube-dl/issues/5498
5139         if version_tuple(xattr.__version__) >= (0, 5, 0):
5140             setxattr = xattr.set
5141     elif xattr:
5142         setxattr = xattr.setxattr
5143
5144     if setxattr:
5145         try:
5146             setxattr(path, key, value)
5147         except OSError as e:
5148             raise XAttrMetadataError(e.errno, e.strerror)
5149         return
5150
5151     # UNIX Method 2. Use setfattr/xattr executables
5152     exe = ('setfattr' if check_executable('setfattr', ['--version'])
5153            else 'xattr' if check_executable('xattr', ['-h']) else None)
5154     if not exe:
5155         raise XAttrUnavailableError(
5156             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
5157             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
5158
5159     value = value.decode()
5160     try:
5161         _, stderr, returncode = Popen.run(
5162             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
5163             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5164     except OSError as e:
5165         raise XAttrMetadataError(e.errno, e.strerror)
5166     if returncode:
5167         raise XAttrMetadataError(returncode, stderr)
5168
5169
5170 def random_birthday(year_field, month_field, day_field):
5171     start_date = datetime.date(1950, 1, 1)
5172     end_date = datetime.date(1995, 12, 31)
5173     offset = random.randint(0, (end_date - start_date).days)
5174     random_date = start_date + datetime.timedelta(offset)
5175     return {
5176         year_field: str(random_date.year),
5177         month_field: str(random_date.month),
5178         day_field: str(random_date.day),
5179     }
5180
5181
5182 # Templates for internet shortcut files, which are plain text files.
5183 DOT_URL_LINK_TEMPLATE = '''\
5184 [InternetShortcut]
5185 URL=%(url)s
5186 '''
5187
5188 DOT_WEBLOC_LINK_TEMPLATE = '''\
5189 <?xml version="1.0" encoding="UTF-8"?>
5190 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5191 <plist version="1.0">
5192 <dict>
5193 \t<key>URL</key>
5194 \t<string>%(url)s</string>
5195 </dict>
5196 </plist>
5197 '''
5198
5199 DOT_DESKTOP_LINK_TEMPLATE = '''\
5200 [Desktop Entry]
5201 Encoding=UTF-8
5202 Name=%(filename)s
5203 Type=Link
5204 URL=%(url)s
5205 Icon=text-html
5206 '''
5207
5208 LINK_TEMPLATES = {
5209     'url': DOT_URL_LINK_TEMPLATE,
5210     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5211     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5212 }
5213
5214
5215 def iri_to_uri(iri):
5216     """
5217     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5218
5219     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5220     """
5221
5222     iri_parts = urllib.parse.urlparse(iri)
5223
5224     if '[' in iri_parts.netloc:
5225         raise ValueError('IPv6 URIs are not, yet, supported.')
5226         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5227
5228     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5229
5230     net_location = ''
5231     if iri_parts.username:
5232         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5233         if iri_parts.password is not None:
5234             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5235         net_location += '@'
5236
5237     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5238     # The 'idna' encoding produces ASCII text.
5239     if iri_parts.port is not None and iri_parts.port != 80:
5240         net_location += ':' + str(iri_parts.port)
5241
5242     return urllib.parse.urlunparse(
5243         (iri_parts.scheme,
5244             net_location,
5245
5246             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5247
5248             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5249             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5250
5251             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5252             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5253
5254             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5255
5256     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5257
5258
5259 def to_high_limit_path(path):
5260     if sys.platform in ['win32', 'cygwin']:
5261         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5262         return '\\\\?\\' + os.path.abspath(path)
5263
5264     return path
5265
5266
5267 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
5268     val = traverse_obj(obj, *variadic(field))
5269     if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
5270         return default
5271     return template % func(val)
5272
5273
5274 def clean_podcast_url(url):
5275     return re.sub(r'''(?x)
5276         (?:
5277             (?:
5278                 chtbl\.com/track|
5279                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5280                 play\.podtrac\.com
5281             )/[^/]+|
5282             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5283             flex\.acast\.com|
5284             pd(?:
5285                 cn\.co| # https://podcorn.com/analytics-prefix/
5286                 st\.fm # https://podsights.com/docs/
5287             )/e
5288         )/''', '', url)
5289
5290
5291 _HEX_TABLE = '0123456789abcdef'
5292
5293
5294 def random_uuidv4():
5295     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5296
5297
5298 def make_dir(path, to_screen=None):
5299     try:
5300         dn = os.path.dirname(path)
5301         if dn and not os.path.exists(dn):
5302             os.makedirs(dn)
5303         return True
5304     except OSError as err:
5305         if callable(to_screen) is not None:
5306             to_screen('unable to create directory ' + error_to_compat_str(err))
5307         return False
5308
5309
5310 def get_executable_path():
5311     from .update import _get_variant_and_executable_path
5312
5313     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5314
5315
5316 def load_plugins(name, suffix, namespace):
5317     classes = {}
5318     with contextlib.suppress(FileNotFoundError):
5319         plugins_spec = importlib.util.spec_from_file_location(
5320             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5321         plugins = importlib.util.module_from_spec(plugins_spec)
5322         sys.modules[plugins_spec.name] = plugins
5323         plugins_spec.loader.exec_module(plugins)
5324         for name in dir(plugins):
5325             if name in namespace:
5326                 continue
5327             if not name.endswith(suffix):
5328                 continue
5329             klass = getattr(plugins, name)
5330             classes[name] = namespace[name] = klass
5331     return classes
5332
5333
5334 def traverse_obj(
5335         obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
5336         casesense=True, is_user_input=False, traverse_string=False):
5337     """
5338     Safely traverse nested `dict`s and `Sequence`s
5339
5340     >>> obj = [{}, {"key": "value"}]
5341     >>> traverse_obj(obj, (1, "key"))
5342     "value"
5343
5344     Each of the provided `paths` is tested and the first producing a valid result will be returned.
5345     The next path will also be tested if the path branched but no results could be found.
5346     Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
5347     A value of None is treated as the absence of a value.
5348
5349     The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
5350
5351     The keys in the path can be one of:
5352         - `None`:           Return the current object.
5353         - `str`/`int`:      Return `obj[key]`. For `re.Match, return `obj.group(key)`.
5354         - `slice`:          Branch out and return all values in `obj[key]`.
5355         - `Ellipsis`:       Branch out and return a list of all values.
5356         - `tuple`/`list`:   Branch out and return a list of all matching values.
5357                             Read as: `[traverse_obj(obj, branch) for branch in branches]`.
5358         - `function`:       Branch out and return values filtered by the function.
5359                             Read as: `[value for key, value in obj if function(key, value)]`.
5360                             For `Sequence`s, `key` is the index of the value.
5361         - `dict`            Transform the current object and return a matching dict.
5362                             Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
5363
5364         `tuple`, `list`, and `dict` all support nested paths and branches.
5365
5366     @params paths           Paths which to traverse by.
5367     @param default          Value to return if the paths do not match.
5368     @param expected_type    If a `type`, only accept final values of this type.
5369                             If any other callable, try to call the function on each result.
5370     @param get_all          If `False`, return the first matching result, otherwise all matching ones.
5371     @param casesense        If `False`, consider string dictionary keys as case insensitive.
5372
5373     The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
5374
5375     @param is_user_input    Whether the keys are generated from user input.
5376                             If `True` strings get converted to `int`/`slice` if needed.
5377     @param traverse_string  Whether to traverse into objects as strings.
5378                             If `True`, any non-compatible object will first be
5379                             converted into a string and then traversed into.
5380
5381
5382     @returns                The result of the object traversal.
5383                             If successful, `get_all=True`, and the path branches at least once,
5384                             then a list of results is returned instead.
5385                             A list is always returned if the last path branches and no `default` is given.
5386     """
5387     is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
5388     casefold = lambda k: k.casefold() if isinstance(k, str) else k
5389
5390     if isinstance(expected_type, type):
5391         type_test = lambda val: val if isinstance(val, expected_type) else None
5392     else:
5393         type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
5394
5395     def apply_key(key, obj):
5396         if obj is None:
5397             return
5398
5399         elif key is None:
5400             yield obj
5401
5402         elif isinstance(key, (list, tuple)):
5403             for branch in key:
5404                 _, result = apply_path(obj, branch)
5405                 yield from result
5406
5407         elif key is ...:
5408             if isinstance(obj, collections.abc.Mapping):
5409                 yield from obj.values()
5410             elif is_sequence(obj):
5411                 yield from obj
5412             elif isinstance(obj, re.Match):
5413                 yield from obj.groups()
5414             elif traverse_string:
5415                 yield from str(obj)
5416
5417         elif callable(key):
5418             if is_sequence(obj):
5419                 iter_obj = enumerate(obj)
5420             elif isinstance(obj, collections.abc.Mapping):
5421                 iter_obj = obj.items()
5422             elif isinstance(obj, re.Match):
5423                 iter_obj = enumerate((obj.group(), *obj.groups()))
5424             elif traverse_string:
5425                 iter_obj = enumerate(str(obj))
5426             else:
5427                 return
5428             yield from (v for k, v in iter_obj if try_call(key, args=(k, v)))
5429
5430         elif isinstance(key, dict):
5431             iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
5432             yield {k: v if v is not None else default for k, v in iter_obj
5433                    if v is not None or default is not NO_DEFAULT}
5434
5435         elif isinstance(obj, collections.abc.Mapping):
5436             yield (obj.get(key) if casesense or (key in obj)
5437                    else next((v for k, v in obj.items() if casefold(k) == key), None))
5438
5439         elif isinstance(obj, re.Match):
5440             if isinstance(key, int) or casesense:
5441                 with contextlib.suppress(IndexError):
5442                     yield obj.group(key)
5443                     return
5444
5445             if not isinstance(key, str):
5446                 return
5447
5448             yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
5449
5450         else:
5451             if is_user_input:
5452                 key = (int_or_none(key) if ':' not in key
5453                        else slice(*map(int_or_none, key.split(':'))))
5454
5455             if not isinstance(key, (int, slice)):
5456                 return
5457
5458             if not is_sequence(obj):
5459                 if not traverse_string:
5460                     return
5461                 obj = str(obj)
5462
5463             with contextlib.suppress(IndexError):
5464                 yield obj[key]
5465
5466     def apply_path(start_obj, path):
5467         objs = (start_obj,)
5468         has_branched = False
5469
5470         for key in variadic(path):
5471             if is_user_input and key == ':':
5472                 key = ...
5473
5474             if not casesense and isinstance(key, str):
5475                 key = key.casefold()
5476
5477             if key is ... or isinstance(key, (list, tuple)) or callable(key):
5478                 has_branched = True
5479
5480             key_func = functools.partial(apply_key, key)
5481             objs = itertools.chain.from_iterable(map(key_func, objs))
5482
5483         return has_branched, objs
5484
5485     def _traverse_obj(obj, path, use_list=True):
5486         has_branched, results = apply_path(obj, path)
5487         results = LazyList(x for x in map(type_test, results) if x is not None)
5488
5489         if get_all and has_branched:
5490             return results.exhaust() if results or use_list else None
5491
5492         return results[0] if results else None
5493
5494     for index, path in enumerate(paths, 1):
5495         use_list = default is NO_DEFAULT and index == len(paths)
5496         result = _traverse_obj(obj, path, use_list)
5497         if result is not None:
5498             return result
5499
5500     return None if default is NO_DEFAULT else default
5501
5502
5503 def traverse_dict(dictn, keys, casesense=True):
5504     deprecation_warning(f'"{__name__}.traverse_dict" is deprecated and may be removed '
5505                         f'in a future version. Use "{__name__}.traverse_obj" instead')
5506     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5507
5508
5509 def get_first(obj, keys, **kwargs):
5510     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5511
5512
5513 def time_seconds(**kwargs):
5514     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5515     return t.timestamp()
5516
5517
5518 # create a JSON Web Signature (jws) with HS256 algorithm
5519 # the resulting format is in JWS Compact Serialization
5520 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5521 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5522 def jwt_encode_hs256(payload_data, key, headers={}):
5523     header_data = {
5524         'alg': 'HS256',
5525         'typ': 'JWT',
5526     }
5527     if headers:
5528         header_data.update(headers)
5529     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5530     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5531     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5532     signature_b64 = base64.b64encode(h.digest())
5533     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5534     return token
5535
5536
5537 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5538 def jwt_decode_hs256(jwt):
5539     header_b64, payload_b64, signature_b64 = jwt.split('.')
5540     # add trailing ='s that may have been stripped, superfluous ='s are ignored
5541     payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
5542     return payload_data
5543
5544
5545 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5546
5547
5548 @functools.cache
5549 def supports_terminal_sequences(stream):
5550     if compat_os_name == 'nt':
5551         if not WINDOWS_VT_MODE:
5552             return False
5553     elif not os.getenv('TERM'):
5554         return False
5555     try:
5556         return stream.isatty()
5557     except BaseException:
5558         return False
5559
5560
5561 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5562     if get_windows_version() < (10, 0, 10586):
5563         return
5564     global WINDOWS_VT_MODE
5565     try:
5566         Popen.run('', shell=True)
5567     except Exception:
5568         return
5569
5570     WINDOWS_VT_MODE = True
5571     supports_terminal_sequences.cache_clear()
5572
5573
5574 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5575
5576
5577 def remove_terminal_sequences(string):
5578     return _terminal_sequences_re.sub('', string)
5579
5580
5581 def number_of_digits(number):
5582     return len('%d' % number)
5583
5584
5585 def join_nonempty(*values, delim='-', from_dict=None):
5586     if from_dict is not None:
5587         values = (traverse_obj(from_dict, variadic(v)) for v in values)
5588     return delim.join(map(str, filter(None, values)))
5589
5590
5591 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5592     """
5593     Find the largest format dimensions in terms of video width and, for each thumbnail:
5594     * Modify the URL: Match the width with the provided regex and replace with the former width
5595     * Update dimensions
5596
5597     This function is useful with video services that scale the provided thumbnails on demand
5598     """
5599     _keys = ('width', 'height')
5600     max_dimensions = max(
5601         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5602         default=(0, 0))
5603     if not max_dimensions[0]:
5604         return thumbnails
5605     return [
5606         merge_dicts(
5607             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5608             dict(zip(_keys, max_dimensions)), thumbnail)
5609         for thumbnail in thumbnails
5610     ]
5611
5612
5613 def parse_http_range(range):
5614     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5615     if not range:
5616         return None, None, None
5617     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5618     if not crg:
5619         return None, None, None
5620     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5621
5622
5623 def read_stdin(what):
5624     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5625     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5626     return sys.stdin
5627
5628
5629 def determine_file_encoding(data):
5630     """
5631     Detect the text encoding used
5632     @returns (encoding, bytes to skip)
5633     """
5634
5635     # BOM marks are given priority over declarations
5636     for bom, enc in BOMS:
5637         if data.startswith(bom):
5638             return enc, len(bom)
5639
5640     # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
5641     # We ignore the endianness to get a good enough match
5642     data = data.replace(b'\0', b'')
5643     mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
5644     return mobj.group(1).decode() if mobj else None, 0
5645
5646
5647 class Config:
5648     own_args = None
5649     parsed_args = None
5650     filename = None
5651     __initialized = False
5652
5653     def __init__(self, parser, label=None):
5654         self.parser, self.label = parser, label
5655         self._loaded_paths, self.configs = set(), []
5656
5657     def init(self, args=None, filename=None):
5658         assert not self.__initialized
5659         self.own_args, self.filename = args, filename
5660         return self.load_configs()
5661
5662     def load_configs(self):
5663         directory = ''
5664         if self.filename:
5665             location = os.path.realpath(self.filename)
5666             directory = os.path.dirname(location)
5667             if location in self._loaded_paths:
5668                 return False
5669             self._loaded_paths.add(location)
5670
5671         self.__initialized = True
5672         opts, _ = self.parser.parse_known_args(self.own_args)
5673         self.parsed_args = self.own_args
5674         for location in opts.config_locations or []:
5675             if location == '-':
5676                 if location in self._loaded_paths:
5677                     continue
5678                 self._loaded_paths.add(location)
5679                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5680                 continue
5681             location = os.path.join(directory, expand_path(location))
5682             if os.path.isdir(location):
5683                 location = os.path.join(location, 'yt-dlp.conf')
5684             if not os.path.exists(location):
5685                 self.parser.error(f'config location {location} does not exist')
5686             self.append_config(self.read_file(location), location)
5687         return True
5688
5689     def __str__(self):
5690         label = join_nonempty(
5691             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5692             delim=' ')
5693         return join_nonempty(
5694             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5695             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5696             delim='\n')
5697
5698     @staticmethod
5699     def read_file(filename, default=[]):
5700         try:
5701             optionf = open(filename, 'rb')
5702         except OSError:
5703             return default  # silently skip if file is not present
5704         try:
5705             enc, skip = determine_file_encoding(optionf.read(512))
5706             optionf.seek(skip, io.SEEK_SET)
5707         except OSError:
5708             enc = None  # silently skip read errors
5709         try:
5710             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5711             contents = optionf.read().decode(enc or preferredencoding())
5712             res = shlex.split(contents, comments=True)
5713         except Exception as err:
5714             raise ValueError(f'Unable to parse "{filename}": {err}')
5715         finally:
5716             optionf.close()
5717         return res
5718
5719     @staticmethod
5720     def hide_login_info(opts):
5721         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5722         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5723
5724         def _scrub_eq(o):
5725             m = eqre.match(o)
5726             if m:
5727                 return m.group('key') + '=PRIVATE'
5728             else:
5729                 return o
5730
5731         opts = list(map(_scrub_eq, opts))
5732         for idx, opt in enumerate(opts):
5733             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5734                 opts[idx + 1] = 'PRIVATE'
5735         return opts
5736
5737     def append_config(self, *args, label=None):
5738         config = type(self)(self.parser, label)
5739         config._loaded_paths = self._loaded_paths
5740         if config.init(*args):
5741             self.configs.append(config)
5742
5743     @property
5744     def all_args(self):
5745         for config in reversed(self.configs):
5746             yield from config.all_args
5747         yield from self.parsed_args or []
5748
5749     def parse_known_args(self, **kwargs):
5750         return self.parser.parse_known_args(self.all_args, **kwargs)
5751
5752     def parse_args(self):
5753         return self.parser.parse_args(self.all_args)
5754
5755
5756 class WebSocketsWrapper:
5757     """Wraps websockets module to use in non-async scopes"""
5758     pool = None
5759
5760     def __init__(self, url, headers=None, connect=True):
5761         self.loop = asyncio.new_event_loop()
5762         # XXX: "loop" is deprecated
5763         self.conn = websockets.connect(
5764             url, extra_headers=headers, ping_interval=None,
5765             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5766         if connect:
5767             self.__enter__()
5768         atexit.register(self.__exit__, None, None, None)
5769
5770     def __enter__(self):
5771         if not self.pool:
5772             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5773         return self
5774
5775     def send(self, *args):
5776         self.run_with_loop(self.pool.send(*args), self.loop)
5777
5778     def recv(self, *args):
5779         return self.run_with_loop(self.pool.recv(*args), self.loop)
5780
5781     def __exit__(self, type, value, traceback):
5782         try:
5783             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5784         finally:
5785             self.loop.close()
5786             self._cancel_all_tasks(self.loop)
5787
5788     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5789     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5790     @staticmethod
5791     def run_with_loop(main, loop):
5792         if not asyncio.iscoroutine(main):
5793             raise ValueError(f'a coroutine was expected, got {main!r}')
5794
5795         try:
5796             return loop.run_until_complete(main)
5797         finally:
5798             loop.run_until_complete(loop.shutdown_asyncgens())
5799             if hasattr(loop, 'shutdown_default_executor'):
5800                 loop.run_until_complete(loop.shutdown_default_executor())
5801
5802     @staticmethod
5803     def _cancel_all_tasks(loop):
5804         to_cancel = asyncio.all_tasks(loop)
5805
5806         if not to_cancel:
5807             return
5808
5809         for task in to_cancel:
5810             task.cancel()
5811
5812         # XXX: "loop" is removed in python 3.10+
5813         loop.run_until_complete(
5814             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5815
5816         for task in to_cancel:
5817             if task.cancelled():
5818                 continue
5819             if task.exception() is not None:
5820                 loop.call_exception_handler({
5821                     'message': 'unhandled exception during asyncio.run() shutdown',
5822                     'exception': task.exception(),
5823                     'task': task,
5824                 })
5825
5826
5827 def merge_headers(*dicts):
5828     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5829     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5830
5831
5832 def cached_method(f):
5833     """Cache a method"""
5834     signature = inspect.signature(f)
5835
5836     @functools.wraps(f)
5837     def wrapper(self, *args, **kwargs):
5838         bound_args = signature.bind(self, *args, **kwargs)
5839         bound_args.apply_defaults()
5840         key = tuple(bound_args.arguments.values())[1:]
5841
5842         cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {})
5843         if key not in cache:
5844             cache[key] = f(self, *args, **kwargs)
5845         return cache[key]
5846     return wrapper
5847
5848
5849 class classproperty:
5850     """property access for class methods with optional caching"""
5851     def __new__(cls, func=None, *args, **kwargs):
5852         if not func:
5853             return functools.partial(cls, *args, **kwargs)
5854         return super().__new__(cls)
5855
5856     def __init__(self, func, *, cache=False):
5857         functools.update_wrapper(self, func)
5858         self.func = func
5859         self._cache = {} if cache else None
5860
5861     def __get__(self, _, cls):
5862         if self._cache is None:
5863             return self.func(cls)
5864         elif cls not in self._cache:
5865             self._cache[cls] = self.func(cls)
5866         return self._cache[cls]
5867
5868
5869 class Namespace(types.SimpleNamespace):
5870     """Immutable namespace"""
5871
5872     def __iter__(self):
5873         return iter(self.__dict__.values())
5874
5875     @property
5876     def items_(self):
5877         return self.__dict__.items()
5878
5879
5880 MEDIA_EXTENSIONS = Namespace(
5881     common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
5882     video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
5883     common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5884     audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'),
5885     thumbnails=('jpg', 'png', 'webp'),
5886     storyboards=('mhtml', ),
5887     subtitles=('srt', 'vtt', 'ass', 'lrc'),
5888     manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5889 )
5890 MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
5891 MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
5892
5893 KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
5894
5895
5896 class RetryManager:
5897     """Usage:
5898         for retry in RetryManager(...):
5899             try:
5900                 ...
5901             except SomeException as err:
5902                 retry.error = err
5903                 continue
5904     """
5905     attempt, _error = 0, None
5906
5907     def __init__(self, _retries, _error_callback, **kwargs):
5908         self.retries = _retries or 0
5909         self.error_callback = functools.partial(_error_callback, **kwargs)
5910
5911     def _should_retry(self):
5912         return self._error is not NO_DEFAULT and self.attempt <= self.retries
5913
5914     @property
5915     def error(self):
5916         if self._error is NO_DEFAULT:
5917             return None
5918         return self._error
5919
5920     @error.setter
5921     def error(self, value):
5922         self._error = value
5923
5924     def __iter__(self):
5925         while self._should_retry():
5926             self.error = NO_DEFAULT
5927             self.attempt += 1
5928             yield self
5929             if self.error:
5930                 self.error_callback(self.error, self.attempt, self.retries)
5931
5932     @staticmethod
5933     def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
5934         """Utility function for reporting retries"""
5935         if count > retries:
5936             if error:
5937                 return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
5938             raise e
5939
5940         if not count:
5941             return warn(e)
5942         elif isinstance(e, ExtractorError):
5943             e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
5944         warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5945
5946         delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
5947         if delay:
5948             info(f'Sleeping {delay:.2f} seconds ...')
5949             time.sleep(delay)
5950
5951
5952 def make_archive_id(ie, video_id):
5953     ie_key = ie if isinstance(ie, str) else ie.ie_key()
5954     return f'{ie_key.lower()} {video_id}'
5955
5956
5957 def truncate_string(s, left, right=0):
5958     assert left > 3 and right >= 0
5959     if s is None or len(s) <= left + right:
5960         return s
5961     return f'{s[:left-3]}...{s[-right:]}'
5962
5963
5964 def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
5965     assert 'all' in alias_dict, '"all" alias is required'
5966     requested = list(start or [])
5967     for val in options:
5968         discard = val.startswith('-')
5969         if discard:
5970             val = val[1:]
5971
5972         if val in alias_dict:
5973             val = alias_dict[val] if not discard else [
5974                 i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
5975             # NB: Do not allow regex in aliases for performance
5976             requested = orderedSet_from_options(val, alias_dict, start=requested)
5977             continue
5978
5979         current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
5980                    else [val] if val in alias_dict['all'] else None)
5981         if current is None:
5982             raise ValueError(val)
5983
5984         if discard:
5985             for item in current:
5986                 while item in requested:
5987                     requested.remove(item)
5988         else:
5989             requested.extend(current)
5990
5991     return orderedSet(requested)
5992
5993
5994 # Deprecated
5995 has_certifi = bool(certifi)
5996 has_websockets = bool(websockets)