yt_dlp/utils.py

   1 import asyncio
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import collections.abc
   9 import contextlib
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import html.entities
  18 import html.parser
  19 import http.client
  20 import http.cookiejar
  21 import importlib.util
  22 import inspect
  23 import io
  24 import itertools
  25 import json
  26 import locale
  27 import math
  28 import mimetypes
  29 import operator
  30 import os
  31 import platform
  32 import random
  33 import re
  34 import shlex
  35 import socket
  36 import ssl
  37 import struct
  38 import subprocess
  39 import sys
  40 import tempfile
  41 import time
  42 import traceback
  43 import types
  44 import unicodedata
  45 import urllib.error
  46 import urllib.parse
  47 import urllib.request
  48 import xml.etree.ElementTree
  49 import zlib
  50
  51 from .compat import functools  # isort: split
  52 from .compat import (
  53     compat_etree_fromstring,
  54     compat_expanduser,
  55     compat_HTMLParseError,
  56     compat_os_name,
  57     compat_shlex_quote,
  58 )
  59 from .dependencies import brotli, certifi, websockets, xattr
  60 from .socks import ProxyType, sockssocket
  61
  62
  63 def register_socks_protocols():
  64     # "Register" SOCKS protocols
  65     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  66     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  67     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  68         if scheme not in urllib.parse.uses_netloc:
  69             urllib.parse.uses_netloc.append(scheme)
  70
  71
  72 # This is not clearly defined otherwise
  73 compiled_regex_type = type(re.compile(''))
  74
  75
  76 def random_user_agent():
  77     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  78     _CHROME_VERSIONS = (
  79         '90.0.4430.212',
  80         '90.0.4430.24',
  81         '90.0.4430.70',
  82         '90.0.4430.72',
  83         '90.0.4430.85',
  84         '90.0.4430.93',
  85         '91.0.4472.101',
  86         '91.0.4472.106',
  87         '91.0.4472.114',
  88         '91.0.4472.124',
  89         '91.0.4472.164',
  90         '91.0.4472.19',
  91         '91.0.4472.77',
  92         '92.0.4515.107',
  93         '92.0.4515.115',
  94         '92.0.4515.131',
  95         '92.0.4515.159',
  96         '92.0.4515.43',
  97         '93.0.4556.0',
  98         '93.0.4577.15',
  99         '93.0.4577.63',
 100         '93.0.4577.82',
 101         '94.0.4606.41',
 102         '94.0.4606.54',
 103         '94.0.4606.61',
 104         '94.0.4606.71',
 105         '94.0.4606.81',
 106         '94.0.4606.85',
 107         '95.0.4638.17',
 108         '95.0.4638.50',
 109         '95.0.4638.54',
 110         '95.0.4638.69',
 111         '95.0.4638.74',
 112         '96.0.4664.18',
 113         '96.0.4664.45',
 114         '96.0.4664.55',
 115         '96.0.4664.93',
 116         '97.0.4692.20',
 117     )
 118     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 119
 120
 121 SUPPORTED_ENCODINGS = [
 122     'gzip', 'deflate'
 123 ]
 124 if brotli:
 125     SUPPORTED_ENCODINGS.append('br')
 126
 127 std_headers = {
 128     'User-Agent': random_user_agent(),
 129     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 130     'Accept-Language': 'en-us,en;q=0.5',
 131     'Sec-Fetch-Mode': 'navigate',
 132 }
 133
 134
 135 USER_AGENTS = {
 136     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 137 }
 138
 139
 140 NO_DEFAULT = object()
 141 IDENTITY = lambda x: x
 142
 143 ENGLISH_MONTH_NAMES = [
 144     'January', 'February', 'March', 'April', 'May', 'June',
 145     'July', 'August', 'September', 'October', 'November', 'December']
 146
 147 MONTH_NAMES = {
 148     'en': ENGLISH_MONTH_NAMES,
 149     'fr': [
 150         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 151         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 152     # these follow the genitive grammatical case (dopełniacz)
 153     # some websites might be using nominative, which will require another month list
 154     # https://en.wikibooks.org/wiki/Polish/Noun_cases
 155     'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
 156            'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
 157 }
 158
 159 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
 160 TIMEZONE_NAMES = {
 161     'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
 162     'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
 163     'EST': -5, 'EDT': -4,  # Eastern
 164     'CST': -6, 'CDT': -5,  # Central
 165     'MST': -7, 'MDT': -6,  # Mountain
 166     'PST': -8, 'PDT': -7   # Pacific
 167 }
 168
 169 # needed for sanitizing filenames in restricted mode
 170 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 171                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 172                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 173
 174 DATE_FORMATS = (
 175     '%d %B %Y',
 176     '%d %b %Y',
 177     '%B %d %Y',
 178     '%B %dst %Y',
 179     '%B %dnd %Y',
 180     '%B %drd %Y',
 181     '%B %dth %Y',
 182     '%b %d %Y',
 183     '%b %dst %Y',
 184     '%b %dnd %Y',
 185     '%b %drd %Y',
 186     '%b %dth %Y',
 187     '%b %dst %Y %I:%M',
 188     '%b %dnd %Y %I:%M',
 189     '%b %drd %Y %I:%M',
 190     '%b %dth %Y %I:%M',
 191     '%Y %m %d',
 192     '%Y-%m-%d',
 193     '%Y.%m.%d.',
 194     '%Y/%m/%d',
 195     '%Y/%m/%d %H:%M',
 196     '%Y/%m/%d %H:%M:%S',
 197     '%Y%m%d%H%M',
 198     '%Y%m%d%H%M%S',
 199     '%Y%m%d',
 200     '%Y-%m-%d %H:%M',
 201     '%Y-%m-%d %H:%M:%S',
 202     '%Y-%m-%d %H:%M:%S.%f',
 203     '%Y-%m-%d %H:%M:%S:%f',
 204     '%d.%m.%Y %H:%M',
 205     '%d.%m.%Y %H.%M',
 206     '%Y-%m-%dT%H:%M:%SZ',
 207     '%Y-%m-%dT%H:%M:%S.%fZ',
 208     '%Y-%m-%dT%H:%M:%S.%f0Z',
 209     '%Y-%m-%dT%H:%M:%S',
 210     '%Y-%m-%dT%H:%M:%S.%f',
 211     '%Y-%m-%dT%H:%M',
 212     '%b %d %Y at %H:%M',
 213     '%b %d %Y at %H:%M:%S',
 214     '%B %d %Y at %H:%M',
 215     '%B %d %Y at %H:%M:%S',
 216     '%H:%M %d-%b-%Y',
 217 )
 218
 219 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 220 DATE_FORMATS_DAY_FIRST.extend([
 221     '%d-%m-%Y',
 222     '%d.%m.%Y',
 223     '%d.%m.%y',
 224     '%d/%m/%Y',
 225     '%d/%m/%y',
 226     '%d/%m/%Y %H:%M:%S',
 227     '%d-%m-%Y %H:%M',
 228 ])
 229
 230 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 231 DATE_FORMATS_MONTH_FIRST.extend([
 232     '%m-%d-%Y',
 233     '%m.%d.%Y',
 234     '%m/%d/%Y',
 235     '%m/%d/%y',
 236     '%m/%d/%Y %H:%M:%S',
 237 ])
 238
 239 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 240 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
 241
 242 NUMBER_RE = r'\d+(?:\.\d+)?'
 243
 244
 245 @functools.cache
 246 def preferredencoding():
 247     """Get preferred encoding.
 248
 249     Returns the best encoding scheme for the system, based on
 250     locale.getpreferredencoding() and some further tweaks.
 251     """
 252     try:
 253         pref = locale.getpreferredencoding()
 254         'TEST'.encode(pref)
 255     except Exception:
 256         pref = 'UTF-8'
 257
 258     return pref
 259
 260
 261 def write_json_file(obj, fn):
 262     """ Encode obj as JSON and write it to fn, atomically if possible """
 263
 264     tf = tempfile.NamedTemporaryFile(
 265         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 266         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 267
 268     try:
 269         with tf:
 270             json.dump(obj, tf, ensure_ascii=False)
 271         if sys.platform == 'win32':
 272             # Need to remove existing file on Windows, else os.rename raises
 273             # WindowsError or FileExistsError.
 274             with contextlib.suppress(OSError):
 275                 os.unlink(fn)
 276         with contextlib.suppress(OSError):
 277             mask = os.umask(0)
 278             os.umask(mask)
 279             os.chmod(tf.name, 0o666 & ~mask)
 280         os.rename(tf.name, fn)
 281     except Exception:
 282         with contextlib.suppress(OSError):
 283             os.remove(tf.name)
 284         raise
 285
 286
 287 def find_xpath_attr(node, xpath, key, val=None):
 288     """ Find the xpath xpath[@key=val] """
 289     assert re.match(r'^[a-zA-Z_-]+$', key)
 290     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 291     return node.find(expr)
 292
 293 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 294 # the namespace parameter
 295
 296
 297 def xpath_with_ns(path, ns_map):
 298     components = [c.split(':') for c in path.split('/')]
 299     replaced = []
 300     for c in components:
 301         if len(c) == 1:
 302             replaced.append(c[0])
 303         else:
 304             ns, tag = c
 305             replaced.append('{%s}%s' % (ns_map[ns], tag))
 306     return '/'.join(replaced)
 307
 308
 309 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 310     def _find_xpath(xpath):
 311         return node.find(xpath)
 312
 313     if isinstance(xpath, str):
 314         n = _find_xpath(xpath)
 315     else:
 316         for xp in xpath:
 317             n = _find_xpath(xp)
 318             if n is not None:
 319                 break
 320
 321     if n is None:
 322         if default is not NO_DEFAULT:
 323             return default
 324         elif fatal:
 325             name = xpath if name is None else name
 326             raise ExtractorError('Could not find XML element %s' % name)
 327         else:
 328             return None
 329     return n
 330
 331
 332 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 333     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 334     if n is None or n == default:
 335         return n
 336     if n.text is None:
 337         if default is not NO_DEFAULT:
 338             return default
 339         elif fatal:
 340             name = xpath if name is None else name
 341             raise ExtractorError('Could not find XML element\'s text %s' % name)
 342         else:
 343             return None
 344     return n.text
 345
 346
 347 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 348     n = find_xpath_attr(node, xpath, key)
 349     if n is None:
 350         if default is not NO_DEFAULT:
 351             return default
 352         elif fatal:
 353             name = f'{xpath}[@{key}]' if name is None else name
 354             raise ExtractorError('Could not find XML attribute %s' % name)
 355         else:
 356             return None
 357     return n.attrib[key]
 358
 359
 360 def get_element_by_id(id, html, **kwargs):
 361     """Return the content of the tag with the specified ID in the passed HTML document"""
 362     return get_element_by_attribute('id', id, html, **kwargs)
 363
 364
 365 def get_element_html_by_id(id, html, **kwargs):
 366     """Return the html of the tag with the specified ID in the passed HTML document"""
 367     return get_element_html_by_attribute('id', id, html, **kwargs)
 368
 369
 370 def get_element_by_class(class_name, html):
 371     """Return the content of the first tag with the specified class in the passed HTML document"""
 372     retval = get_elements_by_class(class_name, html)
 373     return retval[0] if retval else None
 374
 375
 376 def get_element_html_by_class(class_name, html):
 377     """Return the html of the first tag with the specified class in the passed HTML document"""
 378     retval = get_elements_html_by_class(class_name, html)
 379     return retval[0] if retval else None
 380
 381
 382 def get_element_by_attribute(attribute, value, html, **kwargs):
 383     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 384     return retval[0] if retval else None
 385
 386
 387 def get_element_html_by_attribute(attribute, value, html, **kargs):
 388     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 389     return retval[0] if retval else None
 390
 391
 392 def get_elements_by_class(class_name, html, **kargs):
 393     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 394     return get_elements_by_attribute(
 395         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 396         html, escape_value=False)
 397
 398
 399 def get_elements_html_by_class(class_name, html):
 400     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 401     return get_elements_html_by_attribute(
 402         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 403         html, escape_value=False)
 404
 405
 406 def get_elements_by_attribute(*args, **kwargs):
 407     """Return the content of the tag with the specified attribute in the passed HTML document"""
 408     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 409
 410
 411 def get_elements_html_by_attribute(*args, **kwargs):
 412     """Return the html of the tag with the specified attribute in the passed HTML document"""
 413     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 414
 415
 416 def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
 417     """
 418     Return the text (content) and the html (whole) of the tag with the specified
 419     attribute in the passed HTML document
 420     """
 421     if not value:
 422         return
 423
 424     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 425
 426     value = re.escape(value) if escape_value else value
 427
 428     partial_element_re = rf'''(?x)
 429         <(?P<tag>{tag})
 430          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 431          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 432         '''
 433
 434     for m in re.finditer(partial_element_re, html):
 435         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 436
 437         yield (
 438             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 439             whole
 440         )
 441
 442
 443 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
 444     """
 445     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 446     closing tag for the first opening tag it has encountered, and can be used
 447     as a context manager
 448     """
 449
 450     class HTMLBreakOnClosingTagException(Exception):
 451         pass
 452
 453     def __init__(self):
 454         self.tagstack = collections.deque()
 455         html.parser.HTMLParser.__init__(self)
 456
 457     def __enter__(self):
 458         return self
 459
 460     def __exit__(self, *_):
 461         self.close()
 462
 463     def close(self):
 464         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 465         # so data remains buffered; we no longer have any interest in it, thus
 466         # override this method to discard it
 467         pass
 468
 469     def handle_starttag(self, tag, _):
 470         self.tagstack.append(tag)
 471
 472     def handle_endtag(self, tag):
 473         if not self.tagstack:
 474             raise compat_HTMLParseError('no tags in the stack')
 475         while self.tagstack:
 476             inner_tag = self.tagstack.pop()
 477             if inner_tag == tag:
 478                 break
 479         else:
 480             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 481         if not self.tagstack:
 482             raise self.HTMLBreakOnClosingTagException()
 483
 484
 485 # XXX: This should be far less strict
 486 def get_element_text_and_html_by_tag(tag, html):
 487     """
 488     For the first element with the specified tag in the passed HTML document
 489     return its' content (text) and the whole element (html)
 490     """
 491     def find_or_raise(haystack, needle, exc):
 492         try:
 493             return haystack.index(needle)
 494         except ValueError:
 495             raise exc
 496     closing_tag = f'</{tag}>'
 497     whole_start = find_or_raise(
 498         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 499     content_start = find_or_raise(
 500         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 501     content_start += whole_start + 1
 502     with HTMLBreakOnClosingTagParser() as parser:
 503         parser.feed(html[whole_start:content_start])
 504         if not parser.tagstack or parser.tagstack[0] != tag:
 505             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 506         offset = content_start
 507         while offset < len(html):
 508             next_closing_tag_start = find_or_raise(
 509                 html[offset:], closing_tag,
 510                 compat_HTMLParseError(f'closing {tag} tag not found'))
 511             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 512             try:
 513                 parser.feed(html[offset:offset + next_closing_tag_end])
 514                 offset += next_closing_tag_end
 515             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 516                 return html[content_start:offset + next_closing_tag_start], \
 517                     html[whole_start:offset + next_closing_tag_end]
 518         raise compat_HTMLParseError('unexpected end of html')
 519
 520
 521 class HTMLAttributeParser(html.parser.HTMLParser):
 522     """Trivial HTML parser to gather the attributes for a single element"""
 523
 524     def __init__(self):
 525         self.attrs = {}
 526         html.parser.HTMLParser.__init__(self)
 527
 528     def handle_starttag(self, tag, attrs):
 529         self.attrs = dict(attrs)
 530         raise compat_HTMLParseError('done')
 531
 532
 533 class HTMLListAttrsParser(html.parser.HTMLParser):
 534     """HTML parser to gather the attributes for the elements of a list"""
 535
 536     def __init__(self):
 537         html.parser.HTMLParser.__init__(self)
 538         self.items = []
 539         self._level = 0
 540
 541     def handle_starttag(self, tag, attrs):
 542         if tag == 'li' and self._level == 0:
 543             self.items.append(dict(attrs))
 544         self._level += 1
 545
 546     def handle_endtag(self, tag):
 547         self._level -= 1
 548
 549
 550 def extract_attributes(html_element):
 551     """Given a string for an HTML element such as
 552     <el
 553          a="foo" B="bar" c="&98;az" d=boz
 554          empty= noval entity="&amp;"
 555          sq='"' dq="'"
 556     >
 557     Decode and return a dictionary of attributes.
 558     {
 559         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 560         'empty': '', 'noval': None, 'entity': '&',
 561         'sq': '"', 'dq': '\''
 562     }.
 563     """
 564     parser = HTMLAttributeParser()
 565     with contextlib.suppress(compat_HTMLParseError):
 566         parser.feed(html_element)
 567         parser.close()
 568     return parser.attrs
 569
 570
 571 def parse_list(webpage):
 572     """Given a string for an series of HTML <li> elements,
 573     return a dictionary of their attributes"""
 574     parser = HTMLListAttrsParser()
 575     parser.feed(webpage)
 576     parser.close()
 577     return parser.items
 578
 579
 580 def clean_html(html):
 581     """Clean an HTML snippet into a readable string"""
 582
 583     if html is None:  # Convenience for sanitizing descriptions etc.
 584         return html
 585
 586     html = re.sub(r'\s+', ' ', html)
 587     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 588     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 589     # Strip html tags
 590     html = re.sub('<.*?>', '', html)
 591     # Replace html entities
 592     html = unescapeHTML(html)
 593     return html.strip()
 594
 595
 596 class LenientJSONDecoder(json.JSONDecoder):
 597     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 598         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 599         super().__init__(*args, **kwargs)
 600
 601     def decode(self, s):
 602         if self.transform_source:
 603             s = self.transform_source(s)
 604         try:
 605             if self.ignore_extra:
 606                 return self.raw_decode(s.lstrip())[0]
 607             return super().decode(s)
 608         except json.JSONDecodeError as e:
 609             if e.pos is not None:
 610                 raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
 611             raise
 612
 613
 614 def sanitize_open(filename, open_mode):
 615     """Try to open the given filename, and slightly tweak it if this fails.
 616
 617     Attempts to open the given filename. If this fails, it tries to change
 618     the filename slightly, step by step, until it's either able to open it
 619     or it fails and raises a final exception, like the standard open()
 620     function.
 621
 622     It returns the tuple (stream, definitive_file_name).
 623     """
 624     if filename == '-':
 625         if sys.platform == 'win32':
 626             import msvcrt
 627
 628             # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
 629             with contextlib.suppress(io.UnsupportedOperation):
 630                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 631         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 632
 633     for attempt in range(2):
 634         try:
 635             try:
 636                 if sys.platform == 'win32':
 637                     # FIXME: An exclusive lock also locks the file from being read.
 638                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 639                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 640                     raise LockingUnsupportedError()
 641                 stream = locked_file(filename, open_mode, block=False).__enter__()
 642             except OSError:
 643                 stream = open(filename, open_mode)
 644             return stream, filename
 645         except OSError as err:
 646             if attempt or err.errno in (errno.EACCES,):
 647                 raise
 648             old_filename, filename = filename, sanitize_path(filename)
 649             if old_filename == filename:
 650                 raise
 651
 652
 653 def timeconvert(timestr):
 654     """Convert RFC 2822 defined time string into system timestamp"""
 655     timestamp = None
 656     timetuple = email.utils.parsedate_tz(timestr)
 657     if timetuple is not None:
 658         timestamp = email.utils.mktime_tz(timetuple)
 659     return timestamp
 660
 661
 662 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 663     """Sanitizes a string so it could be used as part of a filename.
 664     @param restricted   Use a stricter subset of allowed characters
 665     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 666                         If unset, yt-dlp's new sanitization rules are in effect
 667     """
 668     if s == '':
 669         return ''
 670
 671     def replace_insane(char):
 672         if restricted and char in ACCENT_CHARS:
 673             return ACCENT_CHARS[char]
 674         elif not restricted and char == '\n':
 675             return '\0 '
 676         elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
 677             # Replace with their full-width unicode counterparts
 678             return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
 679         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 680             return ''
 681         elif char == '"':
 682             return '' if restricted else '\''
 683         elif char == ':':
 684             return '\0_\0-' if restricted else '\0 \0-'
 685         elif char in '\\/|*<>':
 686             return '\0_'
 687         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 688             return '\0_'
 689         return char
 690
 691     # Replace look-alike Unicode glyphs
 692     if restricted and (is_id is NO_DEFAULT or not is_id):
 693         s = unicodedata.normalize('NFKC', s)
 694     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 695     result = ''.join(map(replace_insane, s))
 696     if is_id is NO_DEFAULT:
 697         result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
 698         STRIP_RE = r'(?:\0.|[ _-])*'
 699         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 700     result = result.replace('\0', '') or '_'
 701
 702     if not is_id:
 703         while '__' in result:
 704             result = result.replace('__', '_')
 705         result = result.strip('_')
 706         # Common case of "Foreign band name - English song title"
 707         if restricted and result.startswith('-_'):
 708             result = result[2:]
 709         if result.startswith('-'):
 710             result = '_' + result[len('-'):]
 711         result = result.lstrip('.')
 712         if not result:
 713             result = '_'
 714     return result
 715
 716
 717 def sanitize_path(s, force=False):
 718     """Sanitizes and normalizes path on Windows"""
 719     if sys.platform == 'win32':
 720         force = False
 721         drive_or_unc, _ = os.path.splitdrive(s)
 722     elif force:
 723         drive_or_unc = ''
 724     else:
 725         return s
 726
 727     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 728     if drive_or_unc:
 729         norm_path.pop(0)
 730     sanitized_path = [
 731         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 732         for path_part in norm_path]
 733     if drive_or_unc:
 734         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 735     elif force and s and s[0] == os.path.sep:
 736         sanitized_path.insert(0, os.path.sep)
 737     return os.path.join(*sanitized_path)
 738
 739
 740 def sanitize_url(url, *, scheme='http'):
 741     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 742     # the number of unwanted failures due to missing protocol
 743     if url is None:
 744         return
 745     elif url.startswith('//'):
 746         return f'{scheme}:{url}'
 747     # Fix some common typos seen so far
 748     COMMON_TYPOS = (
 749         # https://github.com/ytdl-org/youtube-dl/issues/15649
 750         (r'^httpss://', r'https://'),
 751         # https://bx1.be/lives/direct-tv/
 752         (r'^rmtp([es]?)://', r'rtmp\1://'),
 753     )
 754     for mistake, fixup in COMMON_TYPOS:
 755         if re.match(mistake, url):
 756             return re.sub(mistake, fixup, url)
 757     return url
 758
 759
 760 def extract_basic_auth(url):
 761     parts = urllib.parse.urlsplit(url)
 762     if parts.username is None:
 763         return url, None
 764     url = urllib.parse.urlunsplit(parts._replace(netloc=(
 765         parts.hostname if parts.port is None
 766         else '%s:%d' % (parts.hostname, parts.port))))
 767     auth_payload = base64.b64encode(
 768         ('%s:%s' % (parts.username, parts.password or '')).encode())
 769     return url, f'Basic {auth_payload.decode()}'
 770
 771
 772 def sanitized_Request(url, *args, **kwargs):
 773     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 774     if auth_header is not None:
 775         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 776         headers['Authorization'] = auth_header
 777     return urllib.request.Request(url, *args, **kwargs)
 778
 779
 780 def expand_path(s):
 781     """Expand shell variables and ~"""
 782     return os.path.expandvars(compat_expanduser(s))
 783
 784
 785 def orderedSet(iterable, *, lazy=False):
 786     """Remove all duplicates from the input iterable"""
 787     def _iter():
 788         seen = []  # Do not use set since the items can be unhashable
 789         for x in iterable:
 790             if x not in seen:
 791                 seen.append(x)
 792                 yield x
 793
 794     return _iter() if lazy else list(_iter())
 795
 796
 797 def _htmlentity_transform(entity_with_semicolon):
 798     """Transforms an HTML entity to a character."""
 799     entity = entity_with_semicolon[:-1]
 800
 801     # Known non-numeric HTML entity
 802     if entity in html.entities.name2codepoint:
 803         return chr(html.entities.name2codepoint[entity])
 804
 805     # TODO: HTML5 allows entities without a semicolon.
 806     # E.g. '&Eacuteric' should be decoded as 'Éric'.
 807     if entity_with_semicolon in html.entities.html5:
 808         return html.entities.html5[entity_with_semicolon]
 809
 810     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 811     if mobj is not None:
 812         numstr = mobj.group(1)
 813         if numstr.startswith('x'):
 814             base = 16
 815             numstr = '0%s' % numstr
 816         else:
 817             base = 10
 818         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 819         with contextlib.suppress(ValueError):
 820             return chr(int(numstr, base))
 821
 822     # Unknown entity in name, return its literal representation
 823     return '&%s;' % entity
 824
 825
 826 def unescapeHTML(s):
 827     if s is None:
 828         return None
 829     assert isinstance(s, str)
 830
 831     return re.sub(
 832         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 833
 834
 835 def escapeHTML(text):
 836     return (
 837         text
 838         .replace('&', '&amp;')
 839         .replace('<', '&lt;')
 840         .replace('>', '&gt;')
 841         .replace('"', '&quot;')
 842         .replace("'", '&#39;')
 843     )
 844
 845
 846 def process_communicate_or_kill(p, *args, **kwargs):
 847     deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
 848                         f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
 849     return Popen.communicate_or_kill(p, *args, **kwargs)
 850
 851
 852 class Popen(subprocess.Popen):
 853     if sys.platform == 'win32':
 854         _startupinfo = subprocess.STARTUPINFO()
 855         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 856     else:
 857         _startupinfo = None
 858
 859     @staticmethod
 860     def _fix_pyinstaller_ld_path(env):
 861         """Restore LD_LIBRARY_PATH when using PyInstaller
 862             Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
 863                  https://github.com/yt-dlp/yt-dlp/issues/4573
 864         """
 865         if not hasattr(sys, '_MEIPASS'):
 866             return
 867
 868         def _fix(key):
 869             orig = env.get(f'{key}_ORIG')
 870             if orig is None:
 871                 env.pop(key, None)
 872             else:
 873                 env[key] = orig
 874
 875         _fix('LD_LIBRARY_PATH')  # Linux
 876         _fix('DYLD_LIBRARY_PATH')  # macOS
 877
 878     def __init__(self, *args, env=None, text=False, **kwargs):
 879         if env is None:
 880             env = os.environ.copy()
 881         self._fix_pyinstaller_ld_path(env)
 882
 883         if text is True:
 884             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 885             kwargs.setdefault('encoding', 'utf-8')
 886             kwargs.setdefault('errors', 'replace')
 887         super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo)
 888
 889     def communicate_or_kill(self, *args, **kwargs):
 890         try:
 891             return self.communicate(*args, **kwargs)
 892         except BaseException:  # Including KeyboardInterrupt
 893             self.kill(timeout=None)
 894             raise
 895
 896     def kill(self, *, timeout=0):
 897         super().kill()
 898         if timeout != 0:
 899             self.wait(timeout=timeout)
 900
 901     @classmethod
 902     def run(cls, *args, timeout=None, **kwargs):
 903         with cls(*args, **kwargs) as proc:
 904             default = '' if proc.text_mode else b''
 905             stdout, stderr = proc.communicate_or_kill(timeout=timeout)
 906             return stdout or default, stderr or default, proc.returncode
 907
 908
 909 def get_subprocess_encoding():
 910     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 911         # For subprocess calls, encode with locale encoding
 912         # Refer to http://stackoverflow.com/a/9951851/35070
 913         encoding = preferredencoding()
 914     else:
 915         encoding = sys.getfilesystemencoding()
 916     if encoding is None:
 917         encoding = 'utf-8'
 918     return encoding
 919
 920
 921 def encodeFilename(s, for_subprocess=False):
 922     assert isinstance(s, str)
 923     return s
 924
 925
 926 def decodeFilename(b, for_subprocess=False):
 927     return b
 928
 929
 930 def encodeArgument(s):
 931     # Legacy code that uses byte strings
 932     # Uncomment the following line after fixing all post processors
 933     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
 934     return s if isinstance(s, str) else s.decode('ascii')
 935
 936
 937 def decodeArgument(b):
 938     return b
 939
 940
 941 def decodeOption(optval):
 942     if optval is None:
 943         return optval
 944     if isinstance(optval, bytes):
 945         optval = optval.decode(preferredencoding())
 946
 947     assert isinstance(optval, str)
 948     return optval
 949
 950
 951 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 952
 953
 954 def timetuple_from_msec(msec):
 955     secs, msec = divmod(msec, 1000)
 956     mins, secs = divmod(secs, 60)
 957     hrs, mins = divmod(mins, 60)
 958     return _timetuple(hrs, mins, secs, msec)
 959
 960
 961 def formatSeconds(secs, delim=':', msec=False):
 962     time = timetuple_from_msec(secs * 1000)
 963     if time.hours:
 964         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 965     elif time.minutes:
 966         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 967     else:
 968         ret = '%d' % time.seconds
 969     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 970
 971
 972 def _ssl_load_windows_store_certs(ssl_context, storename):
 973     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 974     try:
 975         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 976                  if encoding == 'x509_asn' and (
 977                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 978     except PermissionError:
 979         return
 980     for cert in certs:
 981         with contextlib.suppress(ssl.SSLError):
 982             ssl_context.load_verify_locations(cadata=cert)
 983
 984
 985 def make_HTTPS_handler(params, **kwargs):
 986     opts_check_certificate = not params.get('nocheckcertificate')
 987     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 988     context.check_hostname = opts_check_certificate
 989     if params.get('legacyserverconnect'):
 990         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 991         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 992         context.set_ciphers('DEFAULT')
 993     elif (
 994         sys.version_info < (3, 10)
 995         and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1)
 996         and not ssl.OPENSSL_VERSION.startswith('LibreSSL')
 997     ):
 998         # Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1].
 999         # This is to ensure consistent behavior across Python versions, and help avoid fingerprinting
1000         # in some situations [2][3].
1001         # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely
1002         # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe.
1003         # LibreSSL is excluded until further investigation due to cipher support issues [5][6].
1004         # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536
1005         # 2. https://github.com/yt-dlp/yt-dlp/issues/4627
1006         # 3. https://github.com/yt-dlp/yt-dlp/pull/5294
1007         # 4. https://peps.python.org/pep-0644/
1008         # 5. https://peps.python.org/pep-0644/#libressl-support
1009         # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368
1010         context.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM')
1011         context.minimum_version = ssl.TLSVersion.TLSv1_2
1012
1013     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
1014     if opts_check_certificate:
1015         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
1016             context.load_verify_locations(cafile=certifi.where())
1017         else:
1018             try:
1019                 context.load_default_certs()
1020                 # Work around the issue in load_default_certs when there are bad certificates. See:
1021                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
1022                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
1023             except ssl.SSLError:
1024                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
1025                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
1026                     for storename in ('CA', 'ROOT'):
1027                         _ssl_load_windows_store_certs(context, storename)
1028                 context.set_default_verify_paths()
1029
1030     client_certfile = params.get('client_certificate')
1031     if client_certfile:
1032         try:
1033             context.load_cert_chain(
1034                 client_certfile, keyfile=params.get('client_certificate_key'),
1035                 password=params.get('client_certificate_password'))
1036         except ssl.SSLError:
1037             raise YoutubeDLError('Unable to load client certificate')
1038
1039     # Some servers may reject requests if ALPN extension is not sent. See:
1040     # https://github.com/python/cpython/issues/85140
1041     # https://github.com/yt-dlp/yt-dlp/issues/3878
1042     with contextlib.suppress(NotImplementedError):
1043         context.set_alpn_protocols(['http/1.1'])
1044
1045     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
1046
1047
1048 def bug_reports_message(before=';'):
1049     from .update import REPOSITORY
1050
1051     msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
1052            'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')
1053
1054     before = before.rstrip()
1055     if not before or before.endswith(('.', '!', '?')):
1056         msg = msg[0].title() + msg[1:]
1057
1058     return (before + ' ' if before else '') + msg
1059
1060
1061 class YoutubeDLError(Exception):
1062     """Base exception for YoutubeDL errors."""
1063     msg = None
1064
1065     def __init__(self, msg=None):
1066         if msg is not None:
1067             self.msg = msg
1068         elif self.msg is None:
1069             self.msg = type(self).__name__
1070         super().__init__(self.msg)
1071
1072
1073 network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
1074 if hasattr(ssl, 'CertificateError'):
1075     network_exceptions.append(ssl.CertificateError)
1076 network_exceptions = tuple(network_exceptions)
1077
1078
1079 class ExtractorError(YoutubeDLError):
1080     """Error during info extraction."""
1081
1082     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1083         """ tb, if given, is the original traceback (so that it can be printed out).
1084         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1085         """
1086         if sys.exc_info()[0] in network_exceptions:
1087             expected = True
1088
1089         self.orig_msg = str(msg)
1090         self.traceback = tb
1091         self.expected = expected
1092         self.cause = cause
1093         self.video_id = video_id
1094         self.ie = ie
1095         self.exc_info = sys.exc_info()  # preserve original exception
1096         if isinstance(self.exc_info[1], ExtractorError):
1097             self.exc_info = self.exc_info[1].exc_info
1098
1099         super().__init__(''.join((
1100             format_field(ie, None, '[%s] '),
1101             format_field(video_id, None, '%s: '),
1102             msg,
1103             format_field(cause, None, ' (caused by %r)'),
1104             '' if expected else bug_reports_message())))
1105
1106     def format_traceback(self):
1107         return join_nonempty(
1108             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1109             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1110             delim='\n') or None
1111
1112
1113 class UnsupportedError(ExtractorError):
1114     def __init__(self, url):
1115         super().__init__(
1116             'Unsupported URL: %s' % url, expected=True)
1117         self.url = url
1118
1119
1120 class RegexNotFoundError(ExtractorError):
1121     """Error when a regex didn't match"""
1122     pass
1123
1124
1125 class GeoRestrictedError(ExtractorError):
1126     """Geographic restriction Error exception.
1127
1128     This exception may be thrown when a video is not available from your
1129     geographic location due to geographic restrictions imposed by a website.
1130     """
1131
1132     def __init__(self, msg, countries=None, **kwargs):
1133         kwargs['expected'] = True
1134         super().__init__(msg, **kwargs)
1135         self.countries = countries
1136
1137
1138 class UserNotLive(ExtractorError):
1139     """Error when a channel/user is not live"""
1140
1141     def __init__(self, msg=None, **kwargs):
1142         kwargs['expected'] = True
1143         super().__init__(msg or 'The channel is not currently live', **kwargs)
1144
1145
1146 class DownloadError(YoutubeDLError):
1147     """Download Error exception.
1148
1149     This exception may be thrown by FileDownloader objects if they are not
1150     configured to continue on errors. They will contain the appropriate
1151     error message.
1152     """
1153
1154     def __init__(self, msg, exc_info=None):
1155         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1156         super().__init__(msg)
1157         self.exc_info = exc_info
1158
1159
1160 class EntryNotInPlaylist(YoutubeDLError):
1161     """Entry not in playlist exception.
1162
1163     This exception will be thrown by YoutubeDL when a requested entry
1164     is not found in the playlist info_dict
1165     """
1166     msg = 'Entry not found in info'
1167
1168
1169 class SameFileError(YoutubeDLError):
1170     """Same File exception.
1171
1172     This exception will be thrown by FileDownloader objects if they detect
1173     multiple files would have to be downloaded to the same file on disk.
1174     """
1175     msg = 'Fixed output name but more than one file to download'
1176
1177     def __init__(self, filename=None):
1178         if filename is not None:
1179             self.msg += f': {filename}'
1180         super().__init__(self.msg)
1181
1182
1183 class PostProcessingError(YoutubeDLError):
1184     """Post Processing exception.
1185
1186     This exception may be raised by PostProcessor's .run() method to
1187     indicate an error in the postprocessing task.
1188     """
1189
1190
1191 class DownloadCancelled(YoutubeDLError):
1192     """ Exception raised when the download queue should be interrupted """
1193     msg = 'The download was cancelled'
1194
1195
1196 class ExistingVideoReached(DownloadCancelled):
1197     """ --break-on-existing triggered """
1198     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1199
1200
1201 class RejectedVideoReached(DownloadCancelled):
1202     """ --break-on-reject triggered """
1203     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1204
1205
1206 class MaxDownloadsReached(DownloadCancelled):
1207     """ --max-downloads limit has been reached. """
1208     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1209
1210
1211 class ReExtractInfo(YoutubeDLError):
1212     """ Video info needs to be re-extracted. """
1213
1214     def __init__(self, msg, expected=False):
1215         super().__init__(msg)
1216         self.expected = expected
1217
1218
1219 class ThrottledDownload(ReExtractInfo):
1220     """ Download speed below --throttled-rate. """
1221     msg = 'The download speed is below throttle limit'
1222
1223     def __init__(self):
1224         super().__init__(self.msg, expected=False)
1225
1226
1227 class UnavailableVideoError(YoutubeDLError):
1228     """Unavailable Format exception.
1229
1230     This exception will be thrown when a video is requested
1231     in a format that is not available for that video.
1232     """
1233     msg = 'Unable to download video'
1234
1235     def __init__(self, err=None):
1236         if err is not None:
1237             self.msg += f': {err}'
1238         super().__init__(self.msg)
1239
1240
1241 class ContentTooShortError(YoutubeDLError):
1242     """Content Too Short exception.
1243
1244     This exception may be raised by FileDownloader objects when a file they
1245     download is too small for what the server announced first, indicating
1246     the connection was probably interrupted.
1247     """
1248
1249     def __init__(self, downloaded, expected):
1250         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1251         # Both in bytes
1252         self.downloaded = downloaded
1253         self.expected = expected
1254
1255
1256 class XAttrMetadataError(YoutubeDLError):
1257     def __init__(self, code=None, msg='Unknown error'):
1258         super().__init__(msg)
1259         self.code = code
1260         self.msg = msg
1261
1262         # Parsing code and msg
1263         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1264                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1265             self.reason = 'NO_SPACE'
1266         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1267             self.reason = 'VALUE_TOO_LONG'
1268         else:
1269             self.reason = 'NOT_SUPPORTED'
1270
1271
1272 class XAttrUnavailableError(YoutubeDLError):
1273     pass
1274
1275
1276 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1277     hc = http_class(*args, **kwargs)
1278     source_address = ydl_handler._params.get('source_address')
1279
1280     if source_address is not None:
1281         # This is to workaround _create_connection() from socket where it will try all
1282         # address data from getaddrinfo() including IPv6. This filters the result from
1283         # getaddrinfo() based on the source_address value.
1284         # This is based on the cpython socket.create_connection() function.
1285         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1286         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1287             host, port = address
1288             err = None
1289             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1290             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1291             ip_addrs = [addr for addr in addrs if addr[0] == af]
1292             if addrs and not ip_addrs:
1293                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1294                 raise OSError(
1295                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1296                     % (ip_version, source_address[0]))
1297             for res in ip_addrs:
1298                 af, socktype, proto, canonname, sa = res
1299                 sock = None
1300                 try:
1301                     sock = socket.socket(af, socktype, proto)
1302                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1303                         sock.settimeout(timeout)
1304                     sock.bind(source_address)
1305                     sock.connect(sa)
1306                     err = None  # Explicitly break reference cycle
1307                     return sock
1308                 except OSError as _:
1309                     err = _
1310                     if sock is not None:
1311                         sock.close()
1312             if err is not None:
1313                 raise err
1314             else:
1315                 raise OSError('getaddrinfo returns an empty list')
1316         if hasattr(hc, '_create_connection'):
1317             hc._create_connection = _create_connection
1318         hc.source_address = (source_address, 0)
1319
1320     return hc
1321
1322
1323 def handle_youtubedl_headers(headers):
1324     filtered_headers = headers
1325
1326     if 'Youtubedl-no-compression' in filtered_headers:
1327         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1328         del filtered_headers['Youtubedl-no-compression']
1329
1330     return filtered_headers
1331
1332
1333 class YoutubeDLHandler(urllib.request.HTTPHandler):
1334     """Handler for HTTP requests and responses.
1335
1336     This class, when installed with an OpenerDirector, automatically adds
1337     the standard headers to every HTTP request and handles gzipped and
1338     deflated responses from web servers. If compression is to be avoided in
1339     a particular request, the original request in the program code only has
1340     to include the HTTP header "Youtubedl-no-compression", which will be
1341     removed before making the real request.
1342
1343     Part of this code was copied from:
1344
1345     http://techknack.net/python-urllib2-handlers/
1346
1347     Andrew Rowls, the author of that code, agreed to release it to the
1348     public domain.
1349     """
1350
1351     def __init__(self, params, *args, **kwargs):
1352         urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
1353         self._params = params
1354
1355     def http_open(self, req):
1356         conn_class = http.client.HTTPConnection
1357
1358         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1359         if socks_proxy:
1360             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1361             del req.headers['Ytdl-socks-proxy']
1362
1363         return self.do_open(functools.partial(
1364             _create_http_connection, self, conn_class, False),
1365             req)
1366
1367     @staticmethod
1368     def deflate(data):
1369         if not data:
1370             return data
1371         try:
1372             return zlib.decompress(data, -zlib.MAX_WBITS)
1373         except zlib.error:
1374             return zlib.decompress(data)
1375
1376     @staticmethod
1377     def brotli(data):
1378         if not data:
1379             return data
1380         return brotli.decompress(data)
1381
1382     def http_request(self, req):
1383         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1384         # always respected by websites, some tend to give out URLs with non percent-encoded
1385         # non-ASCII characters (see telemb.py, ard.py [#3412])
1386         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1387         # To work around aforementioned issue we will replace request's original URL with
1388         # percent-encoded one
1389         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1390         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1391         url = req.get_full_url()
1392         url_escaped = escape_url(url)
1393
1394         # Substitute URL if any change after escaping
1395         if url != url_escaped:
1396             req = update_Request(req, url=url_escaped)
1397
1398         for h, v in self._params.get('http_headers', std_headers).items():
1399             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1400             # The dict keys are capitalized because of this bug by urllib
1401             if h.capitalize() not in req.headers:
1402                 req.add_header(h, v)
1403
1404         if 'Accept-encoding' not in req.headers:
1405             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1406
1407         req.headers = handle_youtubedl_headers(req.headers)
1408
1409         return super().do_request_(req)
1410
1411     def http_response(self, req, resp):
1412         old_resp = resp
1413         # gzip
1414         if resp.headers.get('Content-encoding', '') == 'gzip':
1415             content = resp.read()
1416             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1417             try:
1418                 uncompressed = io.BytesIO(gz.read())
1419             except OSError as original_ioerror:
1420                 # There may be junk add the end of the file
1421                 # See http://stackoverflow.com/q/4928560/35070 for details
1422                 for i in range(1, 1024):
1423                     try:
1424                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1425                         uncompressed = io.BytesIO(gz.read())
1426                     except OSError:
1427                         continue
1428                     break
1429                 else:
1430                     raise original_ioerror
1431             resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1432             resp.msg = old_resp.msg
1433             del resp.headers['Content-encoding']
1434         # deflate
1435         if resp.headers.get('Content-encoding', '') == 'deflate':
1436             gz = io.BytesIO(self.deflate(resp.read()))
1437             resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1438             resp.msg = old_resp.msg
1439             del resp.headers['Content-encoding']
1440         # brotli
1441         if resp.headers.get('Content-encoding', '') == 'br':
1442             resp = urllib.request.addinfourl(
1443                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1444             resp.msg = old_resp.msg
1445             del resp.headers['Content-encoding']
1446         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1447         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1448         if 300 <= resp.code < 400:
1449             location = resp.headers.get('Location')
1450             if location:
1451                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1452                 location = location.encode('iso-8859-1').decode()
1453                 location_escaped = escape_url(location)
1454                 if location != location_escaped:
1455                     del resp.headers['Location']
1456                     resp.headers['Location'] = location_escaped
1457         return resp
1458
1459     https_request = http_request
1460     https_response = http_response
1461
1462
1463 def make_socks_conn_class(base_class, socks_proxy):
1464     assert issubclass(base_class, (
1465         http.client.HTTPConnection, http.client.HTTPSConnection))
1466
1467     url_components = urllib.parse.urlparse(socks_proxy)
1468     if url_components.scheme.lower() == 'socks5':
1469         socks_type = ProxyType.SOCKS5
1470     elif url_components.scheme.lower() in ('socks', 'socks4'):
1471         socks_type = ProxyType.SOCKS4
1472     elif url_components.scheme.lower() == 'socks4a':
1473         socks_type = ProxyType.SOCKS4A
1474
1475     def unquote_if_non_empty(s):
1476         if not s:
1477             return s
1478         return urllib.parse.unquote_plus(s)
1479
1480     proxy_args = (
1481         socks_type,
1482         url_components.hostname, url_components.port or 1080,
1483         True,  # Remote DNS
1484         unquote_if_non_empty(url_components.username),
1485         unquote_if_non_empty(url_components.password),
1486     )
1487
1488     class SocksConnection(base_class):
1489         def connect(self):
1490             self.sock = sockssocket()
1491             self.sock.setproxy(*proxy_args)
1492             if isinstance(self.timeout, (int, float)):
1493                 self.sock.settimeout(self.timeout)
1494             self.sock.connect((self.host, self.port))
1495
1496             if isinstance(self, http.client.HTTPSConnection):
1497                 if hasattr(self, '_context'):  # Python > 2.6
1498                     self.sock = self._context.wrap_socket(
1499                         self.sock, server_hostname=self.host)
1500                 else:
1501                     self.sock = ssl.wrap_socket(self.sock)
1502
1503     return SocksConnection
1504
1505
1506 class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
1507     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1508         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
1509         self._https_conn_class = https_conn_class or http.client.HTTPSConnection
1510         self._params = params
1511
1512     def https_open(self, req):
1513         kwargs = {}
1514         conn_class = self._https_conn_class
1515
1516         if hasattr(self, '_context'):  # python > 2.6
1517             kwargs['context'] = self._context
1518         if hasattr(self, '_check_hostname'):  # python 3.x
1519             kwargs['check_hostname'] = self._check_hostname
1520
1521         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1522         if socks_proxy:
1523             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1524             del req.headers['Ytdl-socks-proxy']
1525
1526         try:
1527             return self.do_open(
1528                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1529         except urllib.error.URLError as e:
1530             if (isinstance(e.reason, ssl.SSLError)
1531                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1532                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1533             raise
1534
1535
1536 def is_path_like(f):
1537     return isinstance(f, (str, bytes, os.PathLike))
1538
1539
1540 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1541     """
1542     See [1] for cookie file format.
1543
1544     1. https://curl.haxx.se/docs/http-cookies.html
1545     """
1546     _HTTPONLY_PREFIX = '#HttpOnly_'
1547     _ENTRY_LEN = 7
1548     _HEADER = '''# Netscape HTTP Cookie File
1549 # This file is generated by yt-dlp.  Do not edit.
1550
1551 '''
1552     _CookieFileEntry = collections.namedtuple(
1553         'CookieFileEntry',
1554         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1555
1556     def __init__(self, filename=None, *args, **kwargs):
1557         super().__init__(None, *args, **kwargs)
1558         if is_path_like(filename):
1559             filename = os.fspath(filename)
1560         self.filename = filename
1561
1562     @staticmethod
1563     def _true_or_false(cndn):
1564         return 'TRUE' if cndn else 'FALSE'
1565
1566     @contextlib.contextmanager
1567     def open(self, file, *, write=False):
1568         if is_path_like(file):
1569             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1570                 yield f
1571         else:
1572             if write:
1573                 file.truncate(0)
1574             yield file
1575
1576     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1577         now = time.time()
1578         for cookie in self:
1579             if (not ignore_discard and cookie.discard
1580                     or not ignore_expires and cookie.is_expired(now)):
1581                 continue
1582             name, value = cookie.name, cookie.value
1583             if value is None:
1584                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1585                 # with no name, whereas http.cookiejar regards it as a
1586                 # cookie with no value.
1587                 name, value = '', name
1588             f.write('%s\n' % '\t'.join((
1589                 cookie.domain,
1590                 self._true_or_false(cookie.domain.startswith('.')),
1591                 cookie.path,
1592                 self._true_or_false(cookie.secure),
1593                 str_or_none(cookie.expires, default=''),
1594                 name, value
1595             )))
1596
1597     def save(self, filename=None, *args, **kwargs):
1598         """
1599         Save cookies to a file.
1600         Code is taken from CPython 3.6
1601         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1602
1603         if filename is None:
1604             if self.filename is not None:
1605                 filename = self.filename
1606             else:
1607                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1608
1609         # Store session cookies with `expires` set to 0 instead of an empty string
1610         for cookie in self:
1611             if cookie.expires is None:
1612                 cookie.expires = 0
1613
1614         with self.open(filename, write=True) as f:
1615             f.write(self._HEADER)
1616             self._really_save(f, *args, **kwargs)
1617
1618     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1619         """Load cookies from a file."""
1620         if filename is None:
1621             if self.filename is not None:
1622                 filename = self.filename
1623             else:
1624                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1625
1626         def prepare_line(line):
1627             if line.startswith(self._HTTPONLY_PREFIX):
1628                 line = line[len(self._HTTPONLY_PREFIX):]
1629             # comments and empty lines are fine
1630             if line.startswith('#') or not line.strip():
1631                 return line
1632             cookie_list = line.split('\t')
1633             if len(cookie_list) != self._ENTRY_LEN:
1634                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1635             cookie = self._CookieFileEntry(*cookie_list)
1636             if cookie.expires_at and not cookie.expires_at.isdigit():
1637                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1638             return line
1639
1640         cf = io.StringIO()
1641         with self.open(filename) as f:
1642             for line in f:
1643                 try:
1644                     cf.write(prepare_line(line))
1645                 except http.cookiejar.LoadError as e:
1646                     if f'{line.strip()} '[0] in '[{"':
1647                         raise http.cookiejar.LoadError(
1648                             'Cookies file must be Netscape formatted, not JSON. See  '
1649                             'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1650                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1651                     continue
1652         cf.seek(0)
1653         self._really_load(cf, filename, ignore_discard, ignore_expires)
1654         # Session cookies are denoted by either `expires` field set to
1655         # an empty string or 0. MozillaCookieJar only recognizes the former
1656         # (see [1]). So we need force the latter to be recognized as session
1657         # cookies on our own.
1658         # Session cookies may be important for cookies-based authentication,
1659         # e.g. usually, when user does not check 'Remember me' check box while
1660         # logging in on a site, some important cookies are stored as session
1661         # cookies so that not recognizing them will result in failed login.
1662         # 1. https://bugs.python.org/issue17164
1663         for cookie in self:
1664             # Treat `expires=0` cookies as session cookies
1665             if cookie.expires == 0:
1666                 cookie.expires = None
1667                 cookie.discard = True
1668
1669
1670 class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
1671     def __init__(self, cookiejar=None):
1672         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
1673
1674     def http_response(self, request, response):
1675         return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
1676
1677     https_request = urllib.request.HTTPCookieProcessor.http_request
1678     https_response = http_response
1679
1680
1681 class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
1682     """YoutubeDL redirect handler
1683
1684     The code is based on HTTPRedirectHandler implementation from CPython [1].
1685
1686     This redirect handler solves two issues:
1687      - ensures redirect URL is always unicode under python 2
1688      - introduces support for experimental HTTP response status code
1689        308 Permanent Redirect [2] used by some sites [3]
1690
1691     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1692     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1693     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1694     """
1695
1696     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
1697
1698     def redirect_request(self, req, fp, code, msg, headers, newurl):
1699         """Return a Request or None in response to a redirect.
1700
1701         This is called by the http_error_30x methods when a
1702         redirection response is received.  If a redirection should
1703         take place, return a new Request to allow http_error_30x to
1704         perform the redirect.  Otherwise, raise HTTPError if no-one
1705         else should try to handle this url.  Return None if you can't
1706         but another Handler might.
1707         """
1708         m = req.get_method()
1709         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1710                  or code in (301, 302, 303) and m == "POST")):
1711             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
1712         # Strictly (according to RFC 2616), 301 or 302 in response to
1713         # a POST MUST NOT cause a redirection without confirmation
1714         # from the user (of urllib.request, in this case).  In practice,
1715         # essentially all clients do redirect in this case, so we do
1716         # the same.
1717
1718         # Be conciliant with URIs containing a space.  This is mainly
1719         # redundant with the more complete encoding done in http_error_302(),
1720         # but it is kept for compatibility with other callers.
1721         newurl = newurl.replace(' ', '%20')
1722
1723         CONTENT_HEADERS = ("content-length", "content-type")
1724         # NB: don't use dict comprehension for python 2.6 compatibility
1725         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1726
1727         # A 303 must either use GET or HEAD for subsequent request
1728         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1729         if code == 303 and m != 'HEAD':
1730             m = 'GET'
1731         # 301 and 302 redirects are commonly turned into a GET from a POST
1732         # for subsequent requests by browsers, so we'll do the same.
1733         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1734         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1735         if code in (301, 302) and m == 'POST':
1736             m = 'GET'
1737
1738         return urllib.request.Request(
1739             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1740             unverifiable=True, method=m)
1741
1742
1743 def extract_timezone(date_str):
1744     m = re.search(
1745         r'''(?x)
1746             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1747             (?P<tz>Z|                                            # just the UTC Z, or
1748                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1749                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1750                    [ ]?                                          # optional space
1751                 (?P<sign>\+|-)                                   # +/-
1752                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1753             $)
1754         ''', date_str)
1755     if not m:
1756         m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1757         timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
1758         if timezone is not None:
1759             date_str = date_str[:-len(m.group('tz'))]
1760         timezone = datetime.timedelta(hours=timezone or 0)
1761     else:
1762         date_str = date_str[:-len(m.group('tz'))]
1763         if not m.group('sign'):
1764             timezone = datetime.timedelta()
1765         else:
1766             sign = 1 if m.group('sign') == '+' else -1
1767             timezone = datetime.timedelta(
1768                 hours=sign * int(m.group('hours')),
1769                 minutes=sign * int(m.group('minutes')))
1770     return timezone, date_str
1771
1772
1773 def parse_iso8601(date_str, delimiter='T', timezone=None):
1774     """ Return a UNIX timestamp from the given date """
1775
1776     if date_str is None:
1777         return None
1778
1779     date_str = re.sub(r'\.[0-9]+', '', date_str)
1780
1781     if timezone is None:
1782         timezone, date_str = extract_timezone(date_str)
1783
1784     with contextlib.suppress(ValueError):
1785         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1786         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1787         return calendar.timegm(dt.timetuple())
1788
1789
1790 def date_formats(day_first=True):
1791     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1792
1793
1794 def unified_strdate(date_str, day_first=True):
1795     """Return a string with the date in the format YYYYMMDD"""
1796
1797     if date_str is None:
1798         return None
1799     upload_date = None
1800     # Replace commas
1801     date_str = date_str.replace(',', ' ')
1802     # Remove AM/PM + timezone
1803     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1804     _, date_str = extract_timezone(date_str)
1805
1806     for expression in date_formats(day_first):
1807         with contextlib.suppress(ValueError):
1808             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1809     if upload_date is None:
1810         timetuple = email.utils.parsedate_tz(date_str)
1811         if timetuple:
1812             with contextlib.suppress(ValueError):
1813                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1814     if upload_date is not None:
1815         return str(upload_date)
1816
1817
1818 def unified_timestamp(date_str, day_first=True):
1819     if date_str is None:
1820         return None
1821
1822     date_str = re.sub(r'\s+', ' ', re.sub(
1823         r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
1824
1825     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1826     timezone, date_str = extract_timezone(date_str)
1827
1828     # Remove AM/PM + timezone
1829     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1830
1831     # Remove unrecognized timezones from ISO 8601 alike timestamps
1832     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1833     if m:
1834         date_str = date_str[:-len(m.group('tz'))]
1835
1836     # Python only supports microseconds, so remove nanoseconds
1837     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1838     if m:
1839         date_str = m.group(1)
1840
1841     for expression in date_formats(day_first):
1842         with contextlib.suppress(ValueError):
1843             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1844             return calendar.timegm(dt.timetuple())
1845
1846     timetuple = email.utils.parsedate_tz(date_str)
1847     if timetuple:
1848         return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
1849
1850
1851 def determine_ext(url, default_ext='unknown_video'):
1852     if url is None or '.' not in url:
1853         return default_ext
1854     guess = url.partition('?')[0].rpartition('.')[2]
1855     if re.match(r'^[A-Za-z0-9]+$', guess):
1856         return guess
1857     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1858     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1859         return guess.rstrip('/')
1860     else:
1861         return default_ext
1862
1863
1864 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1865     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1866
1867
1868 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1869     R"""
1870     Return a datetime object from a string.
1871     Supported format:
1872         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1873
1874     @param format       strftime format of DATE
1875     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1876                         auto: round to the unit provided in date_str (if applicable).
1877     """
1878     auto_precision = False
1879     if precision == 'auto':
1880         auto_precision = True
1881         precision = 'microsecond'
1882     today = datetime_round(datetime.datetime.utcnow(), precision)
1883     if date_str in ('now', 'today'):
1884         return today
1885     if date_str == 'yesterday':
1886         return today - datetime.timedelta(days=1)
1887     match = re.match(
1888         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1889         date_str)
1890     if match is not None:
1891         start_time = datetime_from_str(match.group('start'), precision, format)
1892         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1893         unit = match.group('unit')
1894         if unit == 'month' or unit == 'year':
1895             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1896             unit = 'day'
1897         else:
1898             if unit == 'week':
1899                 unit = 'day'
1900                 time *= 7
1901             delta = datetime.timedelta(**{unit + 's': time})
1902             new_date = start_time + delta
1903         if auto_precision:
1904             return datetime_round(new_date, unit)
1905         return new_date
1906
1907     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1908
1909
1910 def date_from_str(date_str, format='%Y%m%d', strict=False):
1911     R"""
1912     Return a date object from a string using datetime_from_str
1913
1914     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1915                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1916     """
1917     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1918         raise ValueError(f'Invalid date format "{date_str}"')
1919     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1920
1921
1922 def datetime_add_months(dt, months):
1923     """Increment/Decrement a datetime object by months."""
1924     month = dt.month + months - 1
1925     year = dt.year + month // 12
1926     month = month % 12 + 1
1927     day = min(dt.day, calendar.monthrange(year, month)[1])
1928     return dt.replace(year, month, day)
1929
1930
1931 def datetime_round(dt, precision='day'):
1932     """
1933     Round a datetime object's time to a specific precision
1934     """
1935     if precision == 'microsecond':
1936         return dt
1937
1938     unit_seconds = {
1939         'day': 86400,
1940         'hour': 3600,
1941         'minute': 60,
1942         'second': 1,
1943     }
1944     roundto = lambda x, n: ((x + n / 2) // n) * n
1945     timestamp = calendar.timegm(dt.timetuple())
1946     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1947
1948
1949 def hyphenate_date(date_str):
1950     """
1951     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1952     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1953     if match is not None:
1954         return '-'.join(match.groups())
1955     else:
1956         return date_str
1957
1958
1959 class DateRange:
1960     """Represents a time interval between two dates"""
1961
1962     def __init__(self, start=None, end=None):
1963         """start and end must be strings in the format accepted by date"""
1964         if start is not None:
1965             self.start = date_from_str(start, strict=True)
1966         else:
1967             self.start = datetime.datetime.min.date()
1968         if end is not None:
1969             self.end = date_from_str(end, strict=True)
1970         else:
1971             self.end = datetime.datetime.max.date()
1972         if self.start > self.end:
1973             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1974
1975     @classmethod
1976     def day(cls, day):
1977         """Returns a range that only contains the given day"""
1978         return cls(day, day)
1979
1980     def __contains__(self, date):
1981         """Check if the date is in the range"""
1982         if not isinstance(date, datetime.date):
1983             date = date_from_str(date)
1984         return self.start <= date <= self.end
1985
1986     def __str__(self):
1987         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1988
1989     def __eq__(self, other):
1990         return (isinstance(other, DateRange)
1991                 and self.start == other.start and self.end == other.end)
1992
1993
1994 def platform_name():
1995     """ Returns the platform name as a str """
1996     deprecation_warning(f'"{__name__}.platform_name" is deprecated, use "platform.platform" instead')
1997     return platform.platform()
1998
1999
2000 @functools.cache
2001 def system_identifier():
2002     python_implementation = platform.python_implementation()
2003     if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2004         python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
2005     libc_ver = []
2006     with contextlib.suppress(OSError):  # We may not have access to the executable
2007         libc_ver = platform.libc_ver()
2008
2009     return 'Python %s (%s %s %s) - %s (%s%s)' % (
2010         platform.python_version(),
2011         python_implementation,
2012         platform.machine(),
2013         platform.architecture()[0],
2014         platform.platform(),
2015         ssl.OPENSSL_VERSION,
2016         format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
2017     )
2018
2019
2020 @functools.cache
2021 def get_windows_version():
2022     ''' Get Windows version. returns () if it's not running on Windows '''
2023     if compat_os_name == 'nt':
2024         return version_tuple(platform.win32_ver()[1])
2025     else:
2026         return ()
2027
2028
2029 def write_string(s, out=None, encoding=None):
2030     assert isinstance(s, str)
2031     out = out or sys.stderr
2032
2033     if compat_os_name == 'nt' and supports_terminal_sequences(out):
2034         s = re.sub(r'([\r\n]+)', r' \1', s)
2035
2036     enc, buffer = None, out
2037     if 'b' in getattr(out, 'mode', ''):
2038         enc = encoding or preferredencoding()
2039     elif hasattr(out, 'buffer'):
2040         buffer = out.buffer
2041         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
2042
2043     buffer.write(s.encode(enc, 'ignore') if enc else s)
2044     out.flush()
2045
2046
2047 def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
2048     from . import _IN_CLI
2049     if _IN_CLI:
2050         if msg in deprecation_warning._cache:
2051             return
2052         deprecation_warning._cache.add(msg)
2053         if printer:
2054             return printer(f'{msg}{bug_reports_message()}', **kwargs)
2055         return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
2056     else:
2057         import warnings
2058         warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
2059
2060
2061 deprecation_warning._cache = set()
2062
2063
2064 def bytes_to_intlist(bs):
2065     if not bs:
2066         return []
2067     if isinstance(bs[0], int):  # Python 3
2068         return list(bs)
2069     else:
2070         return [ord(c) for c in bs]
2071
2072
2073 def intlist_to_bytes(xs):
2074     if not xs:
2075         return b''
2076     return struct.pack('%dB' % len(xs), *xs)
2077
2078
2079 class LockingUnsupportedError(OSError):
2080     msg = 'File locking is not supported'
2081
2082     def __init__(self):
2083         super().__init__(self.msg)
2084
2085
2086 # Cross-platform file locking
2087 if sys.platform == 'win32':
2088     import ctypes
2089     import ctypes.wintypes
2090     import msvcrt
2091
2092     class OVERLAPPED(ctypes.Structure):
2093         _fields_ = [
2094             ('Internal', ctypes.wintypes.LPVOID),
2095             ('InternalHigh', ctypes.wintypes.LPVOID),
2096             ('Offset', ctypes.wintypes.DWORD),
2097             ('OffsetHigh', ctypes.wintypes.DWORD),
2098             ('hEvent', ctypes.wintypes.HANDLE),
2099         ]
2100
2101     kernel32 = ctypes.windll.kernel32
2102     LockFileEx = kernel32.LockFileEx
2103     LockFileEx.argtypes = [
2104         ctypes.wintypes.HANDLE,     # hFile
2105         ctypes.wintypes.DWORD,      # dwFlags
2106         ctypes.wintypes.DWORD,      # dwReserved
2107         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2108         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2109         ctypes.POINTER(OVERLAPPED)  # Overlapped
2110     ]
2111     LockFileEx.restype = ctypes.wintypes.BOOL
2112     UnlockFileEx = kernel32.UnlockFileEx
2113     UnlockFileEx.argtypes = [
2114         ctypes.wintypes.HANDLE,     # hFile
2115         ctypes.wintypes.DWORD,      # dwReserved
2116         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2117         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2118         ctypes.POINTER(OVERLAPPED)  # Overlapped
2119     ]
2120     UnlockFileEx.restype = ctypes.wintypes.BOOL
2121     whole_low = 0xffffffff
2122     whole_high = 0x7fffffff
2123
2124     def _lock_file(f, exclusive, block):
2125         overlapped = OVERLAPPED()
2126         overlapped.Offset = 0
2127         overlapped.OffsetHigh = 0
2128         overlapped.hEvent = 0
2129         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2130
2131         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2132                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2133                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2134             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2135             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2136
2137     def _unlock_file(f):
2138         assert f._lock_file_overlapped_p
2139         handle = msvcrt.get_osfhandle(f.fileno())
2140         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2141             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2142
2143 else:
2144     try:
2145         import fcntl
2146
2147         def _lock_file(f, exclusive, block):
2148             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2149             if not block:
2150                 flags |= fcntl.LOCK_NB
2151             try:
2152                 fcntl.flock(f, flags)
2153             except BlockingIOError:
2154                 raise
2155             except OSError:  # AOSP does not have flock()
2156                 fcntl.lockf(f, flags)
2157
2158         def _unlock_file(f):
2159             try:
2160                 fcntl.flock(f, fcntl.LOCK_UN)
2161             except OSError:
2162                 fcntl.lockf(f, fcntl.LOCK_UN)
2163
2164     except ImportError:
2165
2166         def _lock_file(f, exclusive, block):
2167             raise LockingUnsupportedError()
2168
2169         def _unlock_file(f):
2170             raise LockingUnsupportedError()
2171
2172
2173 class locked_file:
2174     locked = False
2175
2176     def __init__(self, filename, mode, block=True, encoding=None):
2177         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2178             raise NotImplementedError(mode)
2179         self.mode, self.block = mode, block
2180
2181         writable = any(f in mode for f in 'wax+')
2182         readable = any(f in mode for f in 'r+')
2183         flags = functools.reduce(operator.ior, (
2184             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2185             getattr(os, 'O_BINARY', 0),  # Windows only
2186             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2187             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2188             os.O_APPEND if 'a' in mode else 0,
2189             os.O_EXCL if 'x' in mode else 0,
2190             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2191         ))
2192
2193         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2194
2195     def __enter__(self):
2196         exclusive = 'r' not in self.mode
2197         try:
2198             _lock_file(self.f, exclusive, self.block)
2199             self.locked = True
2200         except OSError:
2201             self.f.close()
2202             raise
2203         if 'w' in self.mode:
2204             try:
2205                 self.f.truncate()
2206             except OSError as e:
2207                 if e.errno not in (
2208                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2209                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2210                 ):
2211                     raise
2212         return self
2213
2214     def unlock(self):
2215         if not self.locked:
2216             return
2217         try:
2218             _unlock_file(self.f)
2219         finally:
2220             self.locked = False
2221
2222     def __exit__(self, *_):
2223         try:
2224             self.unlock()
2225         finally:
2226             self.f.close()
2227
2228     open = __enter__
2229     close = __exit__
2230
2231     def __getattr__(self, attr):
2232         return getattr(self.f, attr)
2233
2234     def __iter__(self):
2235         return iter(self.f)
2236
2237
2238 @functools.cache
2239 def get_filesystem_encoding():
2240     encoding = sys.getfilesystemencoding()
2241     return encoding if encoding is not None else 'utf-8'
2242
2243
2244 def shell_quote(args):
2245     quoted_args = []
2246     encoding = get_filesystem_encoding()
2247     for a in args:
2248         if isinstance(a, bytes):
2249             # We may get a filename encoded with 'encodeFilename'
2250             a = a.decode(encoding)
2251         quoted_args.append(compat_shlex_quote(a))
2252     return ' '.join(quoted_args)
2253
2254
2255 def smuggle_url(url, data):
2256     """ Pass additional data in a URL for internal use. """
2257
2258     url, idata = unsmuggle_url(url, {})
2259     data.update(idata)
2260     sdata = urllib.parse.urlencode(
2261         {'__youtubedl_smuggle': json.dumps(data)})
2262     return url + '#' + sdata
2263
2264
2265 def unsmuggle_url(smug_url, default=None):
2266     if '#__youtubedl_smuggle' not in smug_url:
2267         return smug_url, default
2268     url, _, sdata = smug_url.rpartition('#')
2269     jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
2270     data = json.loads(jsond)
2271     return url, data
2272
2273
2274 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2275     """ Formats numbers with decimal sufixes like K, M, etc """
2276     num, factor = float_or_none(num), float(factor)
2277     if num is None or num < 0:
2278         return None
2279     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2280     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2281     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2282     if factor == 1024:
2283         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2284     converted = num / (factor ** exponent)
2285     return fmt % (converted, suffix)
2286
2287
2288 def format_bytes(bytes):
2289     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2290
2291
2292 def lookup_unit_table(unit_table, s):
2293     units_re = '|'.join(re.escape(u) for u in unit_table)
2294     m = re.match(
2295         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2296     if not m:
2297         return None
2298     num_str = m.group('num').replace(',', '.')
2299     mult = unit_table[m.group('unit')]
2300     return int(float(num_str) * mult)
2301
2302
2303 def parse_filesize(s):
2304     if s is None:
2305         return None
2306
2307     # The lower-case forms are of course incorrect and unofficial,
2308     # but we support those too
2309     _UNIT_TABLE = {
2310         'B': 1,
2311         'b': 1,
2312         'bytes': 1,
2313         'KiB': 1024,
2314         'KB': 1000,
2315         'kB': 1024,
2316         'Kb': 1000,
2317         'kb': 1000,
2318         'kilobytes': 1000,
2319         'kibibytes': 1024,
2320         'MiB': 1024 ** 2,
2321         'MB': 1000 ** 2,
2322         'mB': 1024 ** 2,
2323         'Mb': 1000 ** 2,
2324         'mb': 1000 ** 2,
2325         'megabytes': 1000 ** 2,
2326         'mebibytes': 1024 ** 2,
2327         'GiB': 1024 ** 3,
2328         'GB': 1000 ** 3,
2329         'gB': 1024 ** 3,
2330         'Gb': 1000 ** 3,
2331         'gb': 1000 ** 3,
2332         'gigabytes': 1000 ** 3,
2333         'gibibytes': 1024 ** 3,
2334         'TiB': 1024 ** 4,
2335         'TB': 1000 ** 4,
2336         'tB': 1024 ** 4,
2337         'Tb': 1000 ** 4,
2338         'tb': 1000 ** 4,
2339         'terabytes': 1000 ** 4,
2340         'tebibytes': 1024 ** 4,
2341         'PiB': 1024 ** 5,
2342         'PB': 1000 ** 5,
2343         'pB': 1024 ** 5,
2344         'Pb': 1000 ** 5,
2345         'pb': 1000 ** 5,
2346         'petabytes': 1000 ** 5,
2347         'pebibytes': 1024 ** 5,
2348         'EiB': 1024 ** 6,
2349         'EB': 1000 ** 6,
2350         'eB': 1024 ** 6,
2351         'Eb': 1000 ** 6,
2352         'eb': 1000 ** 6,
2353         'exabytes': 1000 ** 6,
2354         'exbibytes': 1024 ** 6,
2355         'ZiB': 1024 ** 7,
2356         'ZB': 1000 ** 7,
2357         'zB': 1024 ** 7,
2358         'Zb': 1000 ** 7,
2359         'zb': 1000 ** 7,
2360         'zettabytes': 1000 ** 7,
2361         'zebibytes': 1024 ** 7,
2362         'YiB': 1024 ** 8,
2363         'YB': 1000 ** 8,
2364         'yB': 1024 ** 8,
2365         'Yb': 1000 ** 8,
2366         'yb': 1000 ** 8,
2367         'yottabytes': 1000 ** 8,
2368         'yobibytes': 1024 ** 8,
2369     }
2370
2371     return lookup_unit_table(_UNIT_TABLE, s)
2372
2373
2374 def parse_count(s):
2375     if s is None:
2376         return None
2377
2378     s = re.sub(r'^[^\d]+\s', '', s).strip()
2379
2380     if re.match(r'^[\d,.]+$', s):
2381         return str_to_int(s)
2382
2383     _UNIT_TABLE = {
2384         'k': 1000,
2385         'K': 1000,
2386         'm': 1000 ** 2,
2387         'M': 1000 ** 2,
2388         'kk': 1000 ** 2,
2389         'KK': 1000 ** 2,
2390         'b': 1000 ** 3,
2391         'B': 1000 ** 3,
2392     }
2393
2394     ret = lookup_unit_table(_UNIT_TABLE, s)
2395     if ret is not None:
2396         return ret
2397
2398     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2399     if mobj:
2400         return str_to_int(mobj.group(1))
2401
2402
2403 def parse_resolution(s, *, lenient=False):
2404     if s is None:
2405         return {}
2406
2407     if lenient:
2408         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2409     else:
2410         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2411     if mobj:
2412         return {
2413             'width': int(mobj.group('w')),
2414             'height': int(mobj.group('h')),
2415         }
2416
2417     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2418     if mobj:
2419         return {'height': int(mobj.group(1))}
2420
2421     mobj = re.search(r'\b([48])[kK]\b', s)
2422     if mobj:
2423         return {'height': int(mobj.group(1)) * 540}
2424
2425     return {}
2426
2427
2428 def parse_bitrate(s):
2429     if not isinstance(s, str):
2430         return
2431     mobj = re.search(r'\b(\d+)\s*kbps', s)
2432     if mobj:
2433         return int(mobj.group(1))
2434
2435
2436 def month_by_name(name, lang='en'):
2437     """ Return the number of a month by (locale-independently) English name """
2438
2439     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2440
2441     try:
2442         return month_names.index(name) + 1
2443     except ValueError:
2444         return None
2445
2446
2447 def month_by_abbreviation(abbrev):
2448     """ Return the number of a month by (locale-independently) English
2449         abbreviations """
2450
2451     try:
2452         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2453     except ValueError:
2454         return None
2455
2456
2457 def fix_xml_ampersands(xml_str):
2458     """Replace all the '&' by '&amp;' in XML"""
2459     return re.sub(
2460         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2461         '&amp;',
2462         xml_str)
2463
2464
2465 def setproctitle(title):
2466     assert isinstance(title, str)
2467
2468     # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
2469     try:
2470         import ctypes
2471     except ImportError:
2472         return
2473
2474     try:
2475         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2476     except OSError:
2477         return
2478     except TypeError:
2479         # LoadLibrary in Windows Python 2.7.13 only expects
2480         # a bytestring, but since unicode_literals turns
2481         # every string into a unicode string, it fails.
2482         return
2483     title_bytes = title.encode()
2484     buf = ctypes.create_string_buffer(len(title_bytes))
2485     buf.value = title_bytes
2486     try:
2487         libc.prctl(15, buf, 0, 0, 0)
2488     except AttributeError:
2489         return  # Strange libc, just skip this
2490
2491
2492 def remove_start(s, start):
2493     return s[len(start):] if s is not None and s.startswith(start) else s
2494
2495
2496 def remove_end(s, end):
2497     return s[:-len(end)] if s is not None and s.endswith(end) else s
2498
2499
2500 def remove_quotes(s):
2501     if s is None or len(s) < 2:
2502         return s
2503     for quote in ('"', "'", ):
2504         if s[0] == quote and s[-1] == quote:
2505             return s[1:-1]
2506     return s
2507
2508
2509 def get_domain(url):
2510     """
2511     This implementation is inconsistent, but is kept for compatibility.
2512     Use this only for "webpage_url_domain"
2513     """
2514     return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
2515
2516
2517 def url_basename(url):
2518     path = urllib.parse.urlparse(url).path
2519     return path.strip('/').split('/')[-1]
2520
2521
2522 def base_url(url):
2523     return re.match(r'https?://[^?#]+/', url).group()
2524
2525
2526 def urljoin(base, path):
2527     if isinstance(path, bytes):
2528         path = path.decode()
2529     if not isinstance(path, str) or not path:
2530         return None
2531     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2532         return path
2533     if isinstance(base, bytes):
2534         base = base.decode()
2535     if not isinstance(base, str) or not re.match(
2536             r'^(?:https?:)?//', base):
2537         return None
2538     return urllib.parse.urljoin(base, path)
2539
2540
2541 class HEADRequest(urllib.request.Request):
2542     def get_method(self):
2543         return 'HEAD'
2544
2545
2546 class PUTRequest(urllib.request.Request):
2547     def get_method(self):
2548         return 'PUT'
2549
2550
2551 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2552     if get_attr and v is not None:
2553         v = getattr(v, get_attr, None)
2554     try:
2555         return int(v) * invscale // scale
2556     except (ValueError, TypeError, OverflowError):
2557         return default
2558
2559
2560 def str_or_none(v, default=None):
2561     return default if v is None else str(v)
2562
2563
2564 def str_to_int(int_str):
2565     """ A more relaxed version of int_or_none """
2566     if isinstance(int_str, int):
2567         return int_str
2568     elif isinstance(int_str, str):
2569         int_str = re.sub(r'[,\.\+]', '', int_str)
2570         return int_or_none(int_str)
2571
2572
2573 def float_or_none(v, scale=1, invscale=1, default=None):
2574     if v is None:
2575         return default
2576     try:
2577         return float(v) * invscale / scale
2578     except (ValueError, TypeError):
2579         return default
2580
2581
2582 def bool_or_none(v, default=None):
2583     return v if isinstance(v, bool) else default
2584
2585
2586 def strip_or_none(v, default=None):
2587     return v.strip() if isinstance(v, str) else default
2588
2589
2590 def url_or_none(url):
2591     if not url or not isinstance(url, str):
2592         return None
2593     url = url.strip()
2594     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2595
2596
2597 def request_to_url(req):
2598     if isinstance(req, urllib.request.Request):
2599         return req.get_full_url()
2600     else:
2601         return req
2602
2603
2604 def strftime_or_none(timestamp, date_format, default=None):
2605     datetime_object = None
2606     try:
2607         if isinstance(timestamp, (int, float)):  # unix timestamp
2608             # Using naive datetime here can break timestamp() in Windows
2609             # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
2610             datetime_object = datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
2611         elif isinstance(timestamp, str):  # assume YYYYMMDD
2612             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2613         date_format = re.sub(  # Support %s on windows
2614             r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
2615         return datetime_object.strftime(date_format)
2616     except (ValueError, TypeError, AttributeError):
2617         return default
2618
2619
2620 def parse_duration(s):
2621     if not isinstance(s, str):
2622         return None
2623     s = s.strip()
2624     if not s:
2625         return None
2626
2627     days, hours, mins, secs, ms = [None] * 5
2628     m = re.match(r'''(?x)
2629             (?P<before_secs>
2630                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2631             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2632             (?P<ms>[.:][0-9]+)?Z?$
2633         ''', s)
2634     if m:
2635         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2636     else:
2637         m = re.match(
2638             r'''(?ix)(?:P?
2639                 (?:
2640                     [0-9]+\s*y(?:ears?)?,?\s*
2641                 )?
2642                 (?:
2643                     [0-9]+\s*m(?:onths?)?,?\s*
2644                 )?
2645                 (?:
2646                     [0-9]+\s*w(?:eeks?)?,?\s*
2647                 )?
2648                 (?:
2649                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2650                 )?
2651                 T)?
2652                 (?:
2653                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2654                 )?
2655                 (?:
2656                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2657                 )?
2658                 (?:
2659                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2660                 )?Z?$''', s)
2661         if m:
2662             days, hours, mins, secs, ms = m.groups()
2663         else:
2664             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2665             if m:
2666                 hours, mins = m.groups()
2667             else:
2668                 return None
2669
2670     if ms:
2671         ms = ms.replace(':', '.')
2672     return sum(float(part or 0) * mult for part, mult in (
2673         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2674
2675
2676 def prepend_extension(filename, ext, expected_real_ext=None):
2677     name, real_ext = os.path.splitext(filename)
2678     return (
2679         f'{name}.{ext}{real_ext}'
2680         if not expected_real_ext or real_ext[1:] == expected_real_ext
2681         else f'{filename}.{ext}')
2682
2683
2684 def replace_extension(filename, ext, expected_real_ext=None):
2685     name, real_ext = os.path.splitext(filename)
2686     return '{}.{}'.format(
2687         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2688         ext)
2689
2690
2691 def check_executable(exe, args=[]):
2692     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2693     args can be a list of arguments for a short output (like -version) """
2694     try:
2695         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2696     except OSError:
2697         return False
2698     return exe
2699
2700
2701 def _get_exe_version_output(exe, args, *, to_screen=None):
2702     if to_screen:
2703         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2704     try:
2705         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2706         # SIGTTOU if yt-dlp is run in the background.
2707         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2708         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2709                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2710     except OSError:
2711         return False
2712     return stdout
2713
2714
2715 def detect_exe_version(output, version_re=None, unrecognized='present'):
2716     assert isinstance(output, str)
2717     if version_re is None:
2718         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2719     m = re.search(version_re, output)
2720     if m:
2721         return m.group(1)
2722     else:
2723         return unrecognized
2724
2725
2726 def get_exe_version(exe, args=['--version'],
2727                     version_re=None, unrecognized='present'):
2728     """ Returns the version of the specified executable,
2729     or False if the executable is not present """
2730     out = _get_exe_version_output(exe, args)
2731     return detect_exe_version(out, version_re, unrecognized) if out else False
2732
2733
2734 def frange(start=0, stop=None, step=1):
2735     """Float range"""
2736     if stop is None:
2737         start, stop = 0, start
2738     sign = [-1, 1][step > 0] if step else 0
2739     while sign * start < sign * stop:
2740         yield start
2741         start += step
2742
2743
2744 class LazyList(collections.abc.Sequence):
2745     """Lazy immutable list from an iterable
2746     Note that slices of a LazyList are lists and not LazyList"""
2747
2748     class IndexError(IndexError):
2749         pass
2750
2751     def __init__(self, iterable, *, reverse=False, _cache=None):
2752         self._iterable = iter(iterable)
2753         self._cache = [] if _cache is None else _cache
2754         self._reversed = reverse
2755
2756     def __iter__(self):
2757         if self._reversed:
2758             # We need to consume the entire iterable to iterate in reverse
2759             yield from self.exhaust()
2760             return
2761         yield from self._cache
2762         for item in self._iterable:
2763             self._cache.append(item)
2764             yield item
2765
2766     def _exhaust(self):
2767         self._cache.extend(self._iterable)
2768         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2769         return self._cache
2770
2771     def exhaust(self):
2772         """Evaluate the entire iterable"""
2773         return self._exhaust()[::-1 if self._reversed else 1]
2774
2775     @staticmethod
2776     def _reverse_index(x):
2777         return None if x is None else ~x
2778
2779     def __getitem__(self, idx):
2780         if isinstance(idx, slice):
2781             if self._reversed:
2782                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2783             start, stop, step = idx.start, idx.stop, idx.step or 1
2784         elif isinstance(idx, int):
2785             if self._reversed:
2786                 idx = self._reverse_index(idx)
2787             start, stop, step = idx, idx, 0
2788         else:
2789             raise TypeError('indices must be integers or slices')
2790         if ((start or 0) < 0 or (stop or 0) < 0
2791                 or (start is None and step < 0)
2792                 or (stop is None and step > 0)):
2793             # We need to consume the entire iterable to be able to slice from the end
2794             # Obviously, never use this with infinite iterables
2795             self._exhaust()
2796             try:
2797                 return self._cache[idx]
2798             except IndexError as e:
2799                 raise self.IndexError(e) from e
2800         n = max(start or 0, stop or 0) - len(self._cache) + 1
2801         if n > 0:
2802             self._cache.extend(itertools.islice(self._iterable, n))
2803         try:
2804             return self._cache[idx]
2805         except IndexError as e:
2806             raise self.IndexError(e) from e
2807
2808     def __bool__(self):
2809         try:
2810             self[-1] if self._reversed else self[0]
2811         except self.IndexError:
2812             return False
2813         return True
2814
2815     def __len__(self):
2816         self._exhaust()
2817         return len(self._cache)
2818
2819     def __reversed__(self):
2820         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2821
2822     def __copy__(self):
2823         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2824
2825     def __repr__(self):
2826         # repr and str should mimic a list. So we exhaust the iterable
2827         return repr(self.exhaust())
2828
2829     def __str__(self):
2830         return repr(self.exhaust())
2831
2832
2833 class PagedList:
2834
2835     class IndexError(IndexError):
2836         pass
2837
2838     def __len__(self):
2839         # This is only useful for tests
2840         return len(self.getslice())
2841
2842     def __init__(self, pagefunc, pagesize, use_cache=True):
2843         self._pagefunc = pagefunc
2844         self._pagesize = pagesize
2845         self._pagecount = float('inf')
2846         self._use_cache = use_cache
2847         self._cache = {}
2848
2849     def getpage(self, pagenum):
2850         page_results = self._cache.get(pagenum)
2851         if page_results is None:
2852             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2853         if self._use_cache:
2854             self._cache[pagenum] = page_results
2855         return page_results
2856
2857     def getslice(self, start=0, end=None):
2858         return list(self._getslice(start, end))
2859
2860     def _getslice(self, start, end):
2861         raise NotImplementedError('This method must be implemented by subclasses')
2862
2863     def __getitem__(self, idx):
2864         assert self._use_cache, 'Indexing PagedList requires cache'
2865         if not isinstance(idx, int) or idx < 0:
2866             raise TypeError('indices must be non-negative integers')
2867         entries = self.getslice(idx, idx + 1)
2868         if not entries:
2869             raise self.IndexError()
2870         return entries[0]
2871
2872
2873 class OnDemandPagedList(PagedList):
2874     """Download pages until a page with less than maximum results"""
2875
2876     def _getslice(self, start, end):
2877         for pagenum in itertools.count(start // self._pagesize):
2878             firstid = pagenum * self._pagesize
2879             nextfirstid = pagenum * self._pagesize + self._pagesize
2880             if start >= nextfirstid:
2881                 continue
2882
2883             startv = (
2884                 start % self._pagesize
2885                 if firstid <= start < nextfirstid
2886                 else 0)
2887             endv = (
2888                 ((end - 1) % self._pagesize) + 1
2889                 if (end is not None and firstid <= end <= nextfirstid)
2890                 else None)
2891
2892             try:
2893                 page_results = self.getpage(pagenum)
2894             except Exception:
2895                 self._pagecount = pagenum - 1
2896                 raise
2897             if startv != 0 or endv is not None:
2898                 page_results = page_results[startv:endv]
2899             yield from page_results
2900
2901             # A little optimization - if current page is not "full", ie. does
2902             # not contain page_size videos then we can assume that this page
2903             # is the last one - there are no more ids on further pages -
2904             # i.e. no need to query again.
2905             if len(page_results) + startv < self._pagesize:
2906                 break
2907
2908             # If we got the whole page, but the next page is not interesting,
2909             # break out early as well
2910             if end == nextfirstid:
2911                 break
2912
2913
2914 class InAdvancePagedList(PagedList):
2915     """PagedList with total number of pages known in advance"""
2916
2917     def __init__(self, pagefunc, pagecount, pagesize):
2918         PagedList.__init__(self, pagefunc, pagesize, True)
2919         self._pagecount = pagecount
2920
2921     def _getslice(self, start, end):
2922         start_page = start // self._pagesize
2923         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2924         skip_elems = start - start_page * self._pagesize
2925         only_more = None if end is None else end - start
2926         for pagenum in range(start_page, end_page):
2927             page_results = self.getpage(pagenum)
2928             if skip_elems:
2929                 page_results = page_results[skip_elems:]
2930                 skip_elems = None
2931             if only_more is not None:
2932                 if len(page_results) < only_more:
2933                     only_more -= len(page_results)
2934                 else:
2935                     yield from page_results[:only_more]
2936                     break
2937             yield from page_results
2938
2939
2940 class PlaylistEntries:
2941     MissingEntry = object()
2942     is_exhausted = False
2943
2944     def __init__(self, ydl, info_dict):
2945         self.ydl = ydl
2946
2947         # _entries must be assigned now since infodict can change during iteration
2948         entries = info_dict.get('entries')
2949         if entries is None:
2950             raise EntryNotInPlaylist('There are no entries')
2951         elif isinstance(entries, list):
2952             self.is_exhausted = True
2953
2954         requested_entries = info_dict.get('requested_entries')
2955         self.is_incomplete = bool(requested_entries)
2956         if self.is_incomplete:
2957             assert self.is_exhausted
2958             self._entries = [self.MissingEntry] * max(requested_entries)
2959             for i, entry in zip(requested_entries, entries):
2960                 self._entries[i - 1] = entry
2961         elif isinstance(entries, (list, PagedList, LazyList)):
2962             self._entries = entries
2963         else:
2964             self._entries = LazyList(entries)
2965
2966     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2967         (?P<start>[+-]?\d+)?
2968         (?P<range>[:-]
2969             (?P<end>[+-]?\d+|inf(?:inite)?)?
2970             (?::(?P<step>[+-]?\d+))?
2971         )?''')
2972
2973     @classmethod
2974     def parse_playlist_items(cls, string):
2975         for segment in string.split(','):
2976             if not segment:
2977                 raise ValueError('There is two or more consecutive commas')
2978             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2979             if not mobj:
2980                 raise ValueError(f'{segment!r} is not a valid specification')
2981             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2982             if int_or_none(step) == 0:
2983                 raise ValueError(f'Step in {segment!r} cannot be zero')
2984             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2985
2986     def get_requested_items(self):
2987         playlist_items = self.ydl.params.get('playlist_items')
2988         playlist_start = self.ydl.params.get('playliststart', 1)
2989         playlist_end = self.ydl.params.get('playlistend')
2990         # For backwards compatibility, interpret -1 as whole list
2991         if playlist_end in (-1, None):
2992             playlist_end = ''
2993         if not playlist_items:
2994             playlist_items = f'{playlist_start}:{playlist_end}'
2995         elif playlist_start != 1 or playlist_end:
2996             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
2997
2998         for index in self.parse_playlist_items(playlist_items):
2999             for i, entry in self[index]:
3000                 yield i, entry
3001                 if not entry:
3002                     continue
3003                 try:
3004                     # TODO: Add auto-generated fields
3005                     self.ydl._match_entry(entry, incomplete=True, silent=True)
3006                 except (ExistingVideoReached, RejectedVideoReached):
3007                     return
3008
3009     def get_full_count(self):
3010         if self.is_exhausted and not self.is_incomplete:
3011             return len(self)
3012         elif isinstance(self._entries, InAdvancePagedList):
3013             if self._entries._pagesize == 1:
3014                 return self._entries._pagecount
3015
3016     @functools.cached_property
3017     def _getter(self):
3018         if isinstance(self._entries, list):
3019             def get_entry(i):
3020                 try:
3021                     entry = self._entries[i]
3022                 except IndexError:
3023                     entry = self.MissingEntry
3024                     if not self.is_incomplete:
3025                         raise self.IndexError()
3026                 if entry is self.MissingEntry:
3027                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
3028                 return entry
3029         else:
3030             def get_entry(i):
3031                 try:
3032                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
3033                 except (LazyList.IndexError, PagedList.IndexError):
3034                     raise self.IndexError()
3035         return get_entry
3036
3037     def __getitem__(self, idx):
3038         if isinstance(idx, int):
3039             idx = slice(idx, idx)
3040
3041         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
3042         step = 1 if idx.step is None else idx.step
3043         if idx.start is None:
3044             start = 0 if step > 0 else len(self) - 1
3045         else:
3046             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
3047
3048         # NB: Do not call len(self) when idx == [:]
3049         if idx.stop is None:
3050             stop = 0 if step < 0 else float('inf')
3051         else:
3052             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
3053         stop += [-1, 1][step > 0]
3054
3055         for i in frange(start, stop, step):
3056             if i < 0:
3057                 continue
3058             try:
3059                 entry = self._getter(i)
3060             except self.IndexError:
3061                 self.is_exhausted = True
3062                 if step > 0:
3063                     break
3064                 continue
3065             yield i + 1, entry
3066
3067     def __len__(self):
3068         return len(tuple(self[:]))
3069
3070     class IndexError(IndexError):
3071         pass
3072
3073
3074 def uppercase_escape(s):
3075     unicode_escape = codecs.getdecoder('unicode_escape')
3076     return re.sub(
3077         r'\\U[0-9a-fA-F]{8}',
3078         lambda m: unicode_escape(m.group(0))[0],
3079         s)
3080
3081
3082 def lowercase_escape(s):
3083     unicode_escape = codecs.getdecoder('unicode_escape')
3084     return re.sub(
3085         r'\\u[0-9a-fA-F]{4}',
3086         lambda m: unicode_escape(m.group(0))[0],
3087         s)
3088
3089
3090 def escape_rfc3986(s):
3091     """Escape non-ASCII characters as suggested by RFC 3986"""
3092     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3093
3094
3095 def escape_url(url):
3096     """Escape URL as suggested by RFC 3986"""
3097     url_parsed = urllib.parse.urlparse(url)
3098     return url_parsed._replace(
3099         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3100         path=escape_rfc3986(url_parsed.path),
3101         params=escape_rfc3986(url_parsed.params),
3102         query=escape_rfc3986(url_parsed.query),
3103         fragment=escape_rfc3986(url_parsed.fragment)
3104     ).geturl()
3105
3106
3107 def parse_qs(url, **kwargs):
3108     return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
3109
3110
3111 def read_batch_urls(batch_fd):
3112     def fixup(url):
3113         if not isinstance(url, str):
3114             url = url.decode('utf-8', 'replace')
3115         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3116         for bom in BOM_UTF8:
3117             if url.startswith(bom):
3118                 url = url[len(bom):]
3119         url = url.lstrip()
3120         if not url or url.startswith(('#', ';', ']')):
3121             return False
3122         # "#" cannot be stripped out since it is part of the URI
3123         # However, it can be safely stripped out if following a whitespace
3124         return re.split(r'\s#', url, 1)[0].rstrip()
3125
3126     with contextlib.closing(batch_fd) as fd:
3127         return [url for url in map(fixup, fd) if url]
3128
3129
3130 def urlencode_postdata(*args, **kargs):
3131     return urllib.parse.urlencode(*args, **kargs).encode('ascii')
3132
3133
3134 def update_url_query(url, query):
3135     if not query:
3136         return url
3137     parsed_url = urllib.parse.urlparse(url)
3138     qs = urllib.parse.parse_qs(parsed_url.query)
3139     qs.update(query)
3140     return urllib.parse.urlunparse(parsed_url._replace(
3141         query=urllib.parse.urlencode(qs, True)))
3142
3143
3144 def update_Request(req, url=None, data=None, headers=None, query=None):
3145     req_headers = req.headers.copy()
3146     req_headers.update(headers or {})
3147     req_data = data or req.data
3148     req_url = update_url_query(url or req.get_full_url(), query)
3149     req_get_method = req.get_method()
3150     if req_get_method == 'HEAD':
3151         req_type = HEADRequest
3152     elif req_get_method == 'PUT':
3153         req_type = PUTRequest
3154     else:
3155         req_type = urllib.request.Request
3156     new_req = req_type(
3157         req_url, data=req_data, headers=req_headers,
3158         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3159     if hasattr(req, 'timeout'):
3160         new_req.timeout = req.timeout
3161     return new_req
3162
3163
3164 def _multipart_encode_impl(data, boundary):
3165     content_type = 'multipart/form-data; boundary=%s' % boundary
3166
3167     out = b''
3168     for k, v in data.items():
3169         out += b'--' + boundary.encode('ascii') + b'\r\n'
3170         if isinstance(k, str):
3171             k = k.encode()
3172         if isinstance(v, str):
3173             v = v.encode()
3174         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3175         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3176         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3177         if boundary.encode('ascii') in content:
3178             raise ValueError('Boundary overlaps with data')
3179         out += content
3180
3181     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3182
3183     return out, content_type
3184
3185
3186 def multipart_encode(data, boundary=None):
3187     '''
3188     Encode a dict to RFC 7578-compliant form-data
3189
3190     data:
3191         A dict where keys and values can be either Unicode or bytes-like
3192         objects.
3193     boundary:
3194         If specified a Unicode object, it's used as the boundary. Otherwise
3195         a random boundary is generated.
3196
3197     Reference: https://tools.ietf.org/html/rfc7578
3198     '''
3199     has_specified_boundary = boundary is not None
3200
3201     while True:
3202         if boundary is None:
3203             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3204
3205         try:
3206             out, content_type = _multipart_encode_impl(data, boundary)
3207             break
3208         except ValueError:
3209             if has_specified_boundary:
3210                 raise
3211             boundary = None
3212
3213     return out, content_type
3214
3215
3216 def variadic(x, allowed_types=(str, bytes, dict)):
3217     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
3218
3219
3220 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3221     for val in map(d.get, variadic(key_or_keys)):
3222         if val is not None and (val or not skip_false_values):
3223             return val
3224     return default
3225
3226
3227 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3228     for f in funcs:
3229         try:
3230             val = f(*args, **kwargs)
3231         except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
3232             pass
3233         else:
3234             if expected_type is None or isinstance(val, expected_type):
3235                 return val
3236
3237
3238 def try_get(src, getter, expected_type=None):
3239     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3240
3241
3242 def filter_dict(dct, cndn=lambda _, v: v is not None):
3243     return {k: v for k, v in dct.items() if cndn(k, v)}
3244
3245
3246 def merge_dicts(*dicts):
3247     merged = {}
3248     for a_dict in dicts:
3249         for k, v in a_dict.items():
3250             if (v is not None and k not in merged
3251                     or isinstance(v, str) and merged[k] == ''):
3252                 merged[k] = v
3253     return merged
3254
3255
3256 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3257     return string if isinstance(string, str) else str(string, encoding, errors)
3258
3259
3260 US_RATINGS = {
3261     'G': 0,
3262     'PG': 10,
3263     'PG-13': 13,
3264     'R': 16,
3265     'NC': 18,
3266 }
3267
3268
3269 TV_PARENTAL_GUIDELINES = {
3270     'TV-Y': 0,
3271     'TV-Y7': 7,
3272     'TV-G': 0,
3273     'TV-PG': 0,
3274     'TV-14': 14,
3275     'TV-MA': 17,
3276 }
3277
3278
3279 def parse_age_limit(s):
3280     # isinstance(False, int) is True. So type() must be used instead
3281     if type(s) is int:  # noqa: E721
3282         return s if 0 <= s <= 21 else None
3283     elif not isinstance(s, str):
3284         return None
3285     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3286     if m:
3287         return int(m.group('age'))
3288     s = s.upper()
3289     if s in US_RATINGS:
3290         return US_RATINGS[s]
3291     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3292     if m:
3293         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3294     return None
3295
3296
3297 def strip_jsonp(code):
3298     return re.sub(
3299         r'''(?sx)^
3300             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3301             (?:\s*&&\s*(?P=func_name))?
3302             \s*\(\s*(?P<callback_data>.*)\);?
3303             \s*?(?://[^\n]*)*$''',
3304         r'\g<callback_data>', code)
3305
3306
3307 def js_to_json(code, vars={}, *, strict=False):
3308     # vars is a dict of var, val pairs to substitute
3309     STRING_QUOTES = '\'"'
3310     STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
3311     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3312     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3313     INTEGER_TABLE = (
3314         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3315         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3316     )
3317
3318     def process_escape(match):
3319         JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
3320         escape = match.group(1) or match.group(2)
3321
3322         return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
3323                 else R'\u00' if escape == 'x'
3324                 else '' if escape == '\n'
3325                 else escape)
3326
3327     def fix_kv(m):
3328         v = m.group(0)
3329         if v in ('true', 'false', 'null'):
3330             return v
3331         elif v in ('undefined', 'void 0'):
3332             return 'null'
3333         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3334             return ''
3335
3336         if v[0] in STRING_QUOTES:
3337             escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v[1:-1])
3338             return f'"{escaped}"'
3339
3340         for regex, base in INTEGER_TABLE:
3341             im = re.match(regex, v)
3342             if im:
3343                 i = int(im.group(1), base)
3344                 return f'"{i}":' if v.endswith(':') else str(i)
3345
3346         if v in vars:
3347             return json.dumps(vars[v])
3348
3349         if not strict:
3350             return f'"{v}"'
3351
3352         raise ValueError(f'Unknown value: {v}')
3353
3354     def create_map(mobj):
3355         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
3356
3357     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
3358     if not strict:
3359         code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3360         code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
3361
3362     return re.sub(rf'''(?sx)
3363         {STRING_RE}|
3364         {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
3365         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3366         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
3367         [0-9]+(?={SKIP_RE}:)|
3368         !+
3369         ''', fix_kv, code)
3370
3371
3372 def qualities(quality_ids):
3373     """ Get a numeric quality value out of a list of possible values """
3374     def q(qid):
3375         try:
3376             return quality_ids.index(qid)
3377         except ValueError:
3378             return -1
3379     return q
3380
3381
3382 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3383
3384
3385 DEFAULT_OUTTMPL = {
3386     'default': '%(title)s [%(id)s].%(ext)s',
3387     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3388 }
3389 OUTTMPL_TYPES = {
3390     'chapter': None,
3391     'subtitle': None,
3392     'thumbnail': None,
3393     'description': 'description',
3394     'annotation': 'annotations.xml',
3395     'infojson': 'info.json',
3396     'link': None,
3397     'pl_video': None,
3398     'pl_thumbnail': None,
3399     'pl_description': 'description',
3400     'pl_infojson': 'info.json',
3401 }
3402
3403 # As of [1] format syntax is:
3404 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3405 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3406 STR_FORMAT_RE_TMPL = r'''(?x)
3407     (?<!%)(?P<prefix>(?:%%)*)
3408     %
3409     (?P<has_key>\((?P<key>{0})\))?
3410     (?P<format>
3411         (?P<conversion>[#0\-+ ]+)?
3412         (?P<min_width>\d+)?
3413         (?P<precision>\.\d+)?
3414         (?P<len_mod>[hlL])?  # unused in python
3415         {1}  # conversion type
3416     )
3417 '''
3418
3419
3420 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3421
3422
3423 def limit_length(s, length):
3424     """ Add ellipses to overly long strings """
3425     if s is None:
3426         return None
3427     ELLIPSES = '...'
3428     if len(s) > length:
3429         return s[:length - len(ELLIPSES)] + ELLIPSES
3430     return s
3431
3432
3433 def version_tuple(v):
3434     return tuple(int(e) for e in re.split(r'[-.]', v))
3435
3436
3437 def is_outdated_version(version, limit, assume_new=True):
3438     if not version:
3439         return not assume_new
3440     try:
3441         return version_tuple(version) < version_tuple(limit)
3442     except ValueError:
3443         return not assume_new
3444
3445
3446 def ytdl_is_updateable():
3447     """ Returns if yt-dlp can be updated with -U """
3448
3449     from .update import is_non_updateable
3450
3451     return not is_non_updateable()
3452
3453
3454 def args_to_str(args):
3455     # Get a short string representation for a subprocess command
3456     return ' '.join(compat_shlex_quote(a) for a in args)
3457
3458
3459 def error_to_compat_str(err):
3460     return str(err)
3461
3462
3463 def error_to_str(err):
3464     return f'{type(err).__name__}: {err}'
3465
3466
3467 def mimetype2ext(mt):
3468     if mt is None:
3469         return None
3470
3471     mt, _, params = mt.partition(';')
3472     mt = mt.strip()
3473
3474     FULL_MAP = {
3475         'audio/mp4': 'm4a',
3476         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3477         # it's the most popular one
3478         'audio/mpeg': 'mp3',
3479         'audio/x-wav': 'wav',
3480         'audio/wav': 'wav',
3481         'audio/wave': 'wav',
3482     }
3483
3484     ext = FULL_MAP.get(mt)
3485     if ext is not None:
3486         return ext
3487
3488     SUBTYPE_MAP = {
3489         '3gpp': '3gp',
3490         'smptett+xml': 'tt',
3491         'ttaf+xml': 'dfxp',
3492         'ttml+xml': 'ttml',
3493         'x-flv': 'flv',
3494         'x-mp4-fragmented': 'mp4',
3495         'x-ms-sami': 'sami',
3496         'x-ms-wmv': 'wmv',
3497         'mpegurl': 'm3u8',
3498         'x-mpegurl': 'm3u8',
3499         'vnd.apple.mpegurl': 'm3u8',
3500         'dash+xml': 'mpd',
3501         'f4m+xml': 'f4m',
3502         'hds+xml': 'f4m',
3503         'vnd.ms-sstr+xml': 'ism',
3504         'quicktime': 'mov',
3505         'mp2t': 'ts',
3506         'x-wav': 'wav',
3507         'filmstrip+json': 'fs',
3508         'svg+xml': 'svg',
3509     }
3510
3511     _, _, subtype = mt.rpartition('/')
3512     ext = SUBTYPE_MAP.get(subtype.lower())
3513     if ext is not None:
3514         return ext
3515
3516     SUFFIX_MAP = {
3517         'json': 'json',
3518         'xml': 'xml',
3519         'zip': 'zip',
3520         'gzip': 'gz',
3521     }
3522
3523     _, _, suffix = subtype.partition('+')
3524     ext = SUFFIX_MAP.get(suffix)
3525     if ext is not None:
3526         return ext
3527
3528     return subtype.replace('+', '.')
3529
3530
3531 def ext2mimetype(ext_or_url):
3532     if not ext_or_url:
3533         return None
3534     if '.' not in ext_or_url:
3535         ext_or_url = f'file.{ext_or_url}'
3536     return mimetypes.guess_type(ext_or_url)[0]
3537
3538
3539 def parse_codecs(codecs_str):
3540     # http://tools.ietf.org/html/rfc6381
3541     if not codecs_str:
3542         return {}
3543     split_codecs = list(filter(None, map(
3544         str.strip, codecs_str.strip().strip(',').split(','))))
3545     vcodec, acodec, scodec, hdr = None, None, None, None
3546     for full_codec in split_codecs:
3547         parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
3548         if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3549                         'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3550             if vcodec:
3551                 continue
3552             vcodec = full_codec
3553             if parts[0] in ('dvh1', 'dvhe'):
3554                 hdr = 'DV'
3555             elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
3556                 hdr = 'HDR10'
3557             elif parts[:2] == ['vp9', '2']:
3558                 hdr = 'HDR10'
3559         elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac',
3560                           'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3561             acodec = acodec or full_codec
3562         elif parts[0] in ('stpp', 'wvtt'):
3563             scodec = scodec or full_codec
3564         else:
3565             write_string(f'WARNING: Unknown codec {full_codec}\n')
3566     if vcodec or acodec or scodec:
3567         return {
3568             'vcodec': vcodec or 'none',
3569             'acodec': acodec or 'none',
3570             'dynamic_range': hdr,
3571             **({'scodec': scodec} if scodec is not None else {}),
3572         }
3573     elif len(split_codecs) == 2:
3574         return {
3575             'vcodec': split_codecs[0],
3576             'acodec': split_codecs[1],
3577         }
3578     return {}
3579
3580
3581 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
3582     assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
3583
3584     allow_mkv = not preferences or 'mkv' in preferences
3585
3586     if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3587         return 'mkv'  # TODO: any other format allows this?
3588
3589     # TODO: All codecs supported by parse_codecs isn't handled here
3590     COMPATIBLE_CODECS = {
3591         'mp4': {
3592             'av1', 'hevc', 'avc1', 'mp4a',  # fourcc (m3u8, mpd)
3593             'h264', 'aacl', 'ec-3',  # Set in ISM
3594         },
3595         'webm': {
3596             'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3597             'vp9x', 'vp8x',  # in the webm spec
3598         },
3599     }
3600
3601     sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
3602     vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3603
3604     for ext in preferences or COMPATIBLE_CODECS.keys():
3605         codec_set = COMPATIBLE_CODECS.get(ext, set())
3606         if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3607             return ext
3608
3609     COMPATIBLE_EXTS = (
3610         {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3611         {'webm'},
3612     )
3613     for ext in preferences or vexts:
3614         current_exts = {ext, *vexts, *aexts}
3615         if ext == 'mkv' or current_exts == {ext} or any(
3616                 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3617             return ext
3618     return 'mkv' if allow_mkv else preferences[-1]
3619
3620
3621 def urlhandle_detect_ext(url_handle):
3622     getheader = url_handle.headers.get
3623
3624     cd = getheader('Content-Disposition')
3625     if cd:
3626         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3627         if m:
3628             e = determine_ext(m.group('filename'), default_ext=None)
3629             if e:
3630                 return e
3631
3632     return mimetype2ext(getheader('Content-Type'))
3633
3634
3635 def encode_data_uri(data, mime_type):
3636     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3637
3638
3639 def age_restricted(content_limit, age_limit):
3640     """ Returns True iff the content should be blocked """
3641
3642     if age_limit is None:  # No limit set
3643         return False
3644     if content_limit is None:
3645         return False  # Content available for everyone
3646     return age_limit < content_limit
3647
3648
3649 # List of known byte-order-marks (BOM)
3650 BOMS = [
3651     (b'\xef\xbb\xbf', 'utf-8'),
3652     (b'\x00\x00\xfe\xff', 'utf-32-be'),
3653     (b'\xff\xfe\x00\x00', 'utf-32-le'),
3654     (b'\xff\xfe', 'utf-16-le'),
3655     (b'\xfe\xff', 'utf-16-be'),
3656 ]
3657
3658
3659 def is_html(first_bytes):
3660     """ Detect whether a file contains HTML by examining its first bytes. """
3661
3662     encoding = 'utf-8'
3663     for bom, enc in BOMS:
3664         while first_bytes.startswith(bom):
3665             encoding, first_bytes = enc, first_bytes[len(bom):]
3666
3667     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3668
3669
3670 def determine_protocol(info_dict):
3671     protocol = info_dict.get('protocol')
3672     if protocol is not None:
3673         return protocol
3674
3675     url = sanitize_url(info_dict['url'])
3676     if url.startswith('rtmp'):
3677         return 'rtmp'
3678     elif url.startswith('mms'):
3679         return 'mms'
3680     elif url.startswith('rtsp'):
3681         return 'rtsp'
3682
3683     ext = determine_ext(url)
3684     if ext == 'm3u8':
3685         return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
3686     elif ext == 'f4m':
3687         return 'f4m'
3688
3689     return urllib.parse.urlparse(url).scheme
3690
3691
3692 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3693     """ Render a list of rows, each as a list of values.
3694     Text after a \t will be right aligned """
3695     def width(string):
3696         return len(remove_terminal_sequences(string).replace('\t', ''))
3697
3698     def get_max_lens(table):
3699         return [max(width(str(v)) for v in col) for col in zip(*table)]
3700
3701     def filter_using_list(row, filterArray):
3702         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3703
3704     max_lens = get_max_lens(data) if hide_empty else []
3705     header_row = filter_using_list(header_row, max_lens)
3706     data = [filter_using_list(row, max_lens) for row in data]
3707
3708     table = [header_row] + data
3709     max_lens = get_max_lens(table)
3710     extra_gap += 1
3711     if delim:
3712         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3713         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3714     for row in table:
3715         for pos, text in enumerate(map(str, row)):
3716             if '\t' in text:
3717                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3718             else:
3719                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3720     ret = '\n'.join(''.join(row).rstrip() for row in table)
3721     return ret
3722
3723
3724 def _match_one(filter_part, dct, incomplete):
3725     # TODO: Generalize code with YoutubeDL._build_format_filter
3726     STRING_OPERATORS = {
3727         '*=': operator.contains,
3728         '^=': lambda attr, value: attr.startswith(value),
3729         '$=': lambda attr, value: attr.endswith(value),
3730         '~=': lambda attr, value: re.search(value, attr),
3731     }
3732     COMPARISON_OPERATORS = {
3733         **STRING_OPERATORS,
3734         '<=': operator.le,  # "<=" must be defined above "<"
3735         '<': operator.lt,
3736         '>=': operator.ge,
3737         '>': operator.gt,
3738         '=': operator.eq,
3739     }
3740
3741     if isinstance(incomplete, bool):
3742         is_incomplete = lambda _: incomplete
3743     else:
3744         is_incomplete = lambda k: k in incomplete
3745
3746     operator_rex = re.compile(r'''(?x)
3747         (?P<key>[a-z_]+)
3748         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3749         (?:
3750             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3751             (?P<strval>.+?)
3752         )
3753         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3754     m = operator_rex.fullmatch(filter_part.strip())
3755     if m:
3756         m = m.groupdict()
3757         unnegated_op = COMPARISON_OPERATORS[m['op']]
3758         if m['negation']:
3759             op = lambda attr, value: not unnegated_op(attr, value)
3760         else:
3761             op = unnegated_op
3762         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3763         if m['quote']:
3764             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3765         actual_value = dct.get(m['key'])
3766         numeric_comparison = None
3767         if isinstance(actual_value, (int, float)):
3768             # If the original field is a string and matching comparisonvalue is
3769             # a number we should respect the origin of the original field
3770             # and process comparison value as a string (see
3771             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3772             try:
3773                 numeric_comparison = int(comparison_value)
3774             except ValueError:
3775                 numeric_comparison = parse_filesize(comparison_value)
3776                 if numeric_comparison is None:
3777                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3778                 if numeric_comparison is None:
3779                     numeric_comparison = parse_duration(comparison_value)
3780         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3781             raise ValueError('Operator %s only supports string values!' % m['op'])
3782         if actual_value is None:
3783             return is_incomplete(m['key']) or m['none_inclusive']
3784         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3785
3786     UNARY_OPERATORS = {
3787         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3788         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3789     }
3790     operator_rex = re.compile(r'''(?x)
3791         (?P<op>%s)\s*(?P<key>[a-z_]+)
3792         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3793     m = operator_rex.fullmatch(filter_part.strip())
3794     if m:
3795         op = UNARY_OPERATORS[m.group('op')]
3796         actual_value = dct.get(m.group('key'))
3797         if is_incomplete(m.group('key')) and actual_value is None:
3798             return True
3799         return op(actual_value)
3800
3801     raise ValueError('Invalid filter part %r' % filter_part)
3802
3803
3804 def match_str(filter_str, dct, incomplete=False):
3805     """ Filter a dictionary with a simple string syntax.
3806     @returns           Whether the filter passes
3807     @param incomplete  Set of keys that is expected to be missing from dct.
3808                        Can be True/False to indicate all/none of the keys may be missing.
3809                        All conditions on incomplete keys pass if the key is missing
3810     """
3811     return all(
3812         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3813         for filter_part in re.split(r'(?<!\\)&', filter_str))
3814
3815
3816 def match_filter_func(filters):
3817     if not filters:
3818         return None
3819     filters = set(variadic(filters))
3820
3821     interactive = '-' in filters
3822     if interactive:
3823         filters.remove('-')
3824
3825     def _match_func(info_dict, incomplete=False):
3826         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3827             return NO_DEFAULT if interactive and not incomplete else None
3828         else:
3829             video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
3830             filter_str = ') | ('.join(map(str.strip, filters))
3831             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3832     return _match_func
3833
3834
3835 class download_range_func:
3836     def __init__(self, chapters, ranges):
3837         self.chapters, self.ranges = chapters, ranges
3838
3839     def __call__(self, info_dict, ydl):
3840         if not self.ranges and not self.chapters:
3841             yield {}
3842
3843         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3844                    else 'Cannot match chapters since chapter information is unavailable')
3845         for regex in self.chapters or []:
3846             for i, chapter in enumerate(info_dict.get('chapters') or []):
3847                 if re.search(regex, chapter['title']):
3848                     warning = None
3849                     yield {**chapter, 'index': i}
3850         if self.chapters and warning:
3851             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3852
3853         yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
3854
3855     def __eq__(self, other):
3856         return (isinstance(other, download_range_func)
3857                 and self.chapters == other.chapters and self.ranges == other.ranges)
3858
3859
3860 def parse_dfxp_time_expr(time_expr):
3861     if not time_expr:
3862         return
3863
3864     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3865     if mobj:
3866         return float(mobj.group('time_offset'))
3867
3868     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3869     if mobj:
3870         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3871
3872
3873 def srt_subtitles_timecode(seconds):
3874     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3875
3876
3877 def ass_subtitles_timecode(seconds):
3878     time = timetuple_from_msec(seconds * 1000)
3879     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3880
3881
3882 def dfxp2srt(dfxp_data):
3883     '''
3884     @param dfxp_data A bytes-like object containing DFXP data
3885     @returns A unicode object containing converted SRT data
3886     '''
3887     LEGACY_NAMESPACES = (
3888         (b'http://www.w3.org/ns/ttml', [
3889             b'http://www.w3.org/2004/11/ttaf1',
3890             b'http://www.w3.org/2006/04/ttaf1',
3891             b'http://www.w3.org/2006/10/ttaf1',
3892         ]),
3893         (b'http://www.w3.org/ns/ttml#styling', [
3894             b'http://www.w3.org/ns/ttml#style',
3895         ]),
3896     )
3897
3898     SUPPORTED_STYLING = [
3899         'color',
3900         'fontFamily',
3901         'fontSize',
3902         'fontStyle',
3903         'fontWeight',
3904         'textDecoration'
3905     ]
3906
3907     _x = functools.partial(xpath_with_ns, ns_map={
3908         'xml': 'http://www.w3.org/XML/1998/namespace',
3909         'ttml': 'http://www.w3.org/ns/ttml',
3910         'tts': 'http://www.w3.org/ns/ttml#styling',
3911     })
3912
3913     styles = {}
3914     default_style = {}
3915
3916     class TTMLPElementParser:
3917         _out = ''
3918         _unclosed_elements = []
3919         _applied_styles = []
3920
3921         def start(self, tag, attrib):
3922             if tag in (_x('ttml:br'), 'br'):
3923                 self._out += '\n'
3924             else:
3925                 unclosed_elements = []
3926                 style = {}
3927                 element_style_id = attrib.get('style')
3928                 if default_style:
3929                     style.update(default_style)
3930                 if element_style_id:
3931                     style.update(styles.get(element_style_id, {}))
3932                 for prop in SUPPORTED_STYLING:
3933                     prop_val = attrib.get(_x('tts:' + prop))
3934                     if prop_val:
3935                         style[prop] = prop_val
3936                 if style:
3937                     font = ''
3938                     for k, v in sorted(style.items()):
3939                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3940                             continue
3941                         if k == 'color':
3942                             font += ' color="%s"' % v
3943                         elif k == 'fontSize':
3944                             font += ' size="%s"' % v
3945                         elif k == 'fontFamily':
3946                             font += ' face="%s"' % v
3947                         elif k == 'fontWeight' and v == 'bold':
3948                             self._out += '<b>'
3949                             unclosed_elements.append('b')
3950                         elif k == 'fontStyle' and v == 'italic':
3951                             self._out += '<i>'
3952                             unclosed_elements.append('i')
3953                         elif k == 'textDecoration' and v == 'underline':
3954                             self._out += '<u>'
3955                             unclosed_elements.append('u')
3956                     if font:
3957                         self._out += '<font' + font + '>'
3958                         unclosed_elements.append('font')
3959                     applied_style = {}
3960                     if self._applied_styles:
3961                         applied_style.update(self._applied_styles[-1])
3962                     applied_style.update(style)
3963                     self._applied_styles.append(applied_style)
3964                 self._unclosed_elements.append(unclosed_elements)
3965
3966         def end(self, tag):
3967             if tag not in (_x('ttml:br'), 'br'):
3968                 unclosed_elements = self._unclosed_elements.pop()
3969                 for element in reversed(unclosed_elements):
3970                     self._out += '</%s>' % element
3971                 if unclosed_elements and self._applied_styles:
3972                     self._applied_styles.pop()
3973
3974         def data(self, data):
3975             self._out += data
3976
3977         def close(self):
3978             return self._out.strip()
3979
3980     def parse_node(node):
3981         target = TTMLPElementParser()
3982         parser = xml.etree.ElementTree.XMLParser(target=target)
3983         parser.feed(xml.etree.ElementTree.tostring(node))
3984         return parser.close()
3985
3986     for k, v in LEGACY_NAMESPACES:
3987         for ns in v:
3988             dfxp_data = dfxp_data.replace(ns, k)
3989
3990     dfxp = compat_etree_fromstring(dfxp_data)
3991     out = []
3992     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3993
3994     if not paras:
3995         raise ValueError('Invalid dfxp/TTML subtitle')
3996
3997     repeat = False
3998     while True:
3999         for style in dfxp.findall(_x('.//ttml:style')):
4000             style_id = style.get('id') or style.get(_x('xml:id'))
4001             if not style_id:
4002                 continue
4003             parent_style_id = style.get('style')
4004             if parent_style_id:
4005                 if parent_style_id not in styles:
4006                     repeat = True
4007                     continue
4008                 styles[style_id] = styles[parent_style_id].copy()
4009             for prop in SUPPORTED_STYLING:
4010                 prop_val = style.get(_x('tts:' + prop))
4011                 if prop_val:
4012                     styles.setdefault(style_id, {})[prop] = prop_val
4013         if repeat:
4014             repeat = False
4015         else:
4016             break
4017
4018     for p in ('body', 'div'):
4019         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4020         if ele is None:
4021             continue
4022         style = styles.get(ele.get('style'))
4023         if not style:
4024             continue
4025         default_style.update(style)
4026
4027     for para, index in zip(paras, itertools.count(1)):
4028         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4029         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4030         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4031         if begin_time is None:
4032             continue
4033         if not end_time:
4034             if not dur:
4035                 continue
4036             end_time = begin_time + dur
4037         out.append('%d\n%s --> %s\n%s\n\n' % (
4038             index,
4039             srt_subtitles_timecode(begin_time),
4040             srt_subtitles_timecode(end_time),
4041             parse_node(para)))
4042
4043     return ''.join(out)
4044
4045
4046 def cli_option(params, command_option, param, separator=None):
4047     param = params.get(param)
4048     return ([] if param is None
4049             else [command_option, str(param)] if separator is None
4050             else [f'{command_option}{separator}{param}'])
4051
4052
4053 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4054     param = params.get(param)
4055     assert param in (True, False, None)
4056     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
4057
4058
4059 def cli_valueless_option(params, command_option, param, expected_value=True):
4060     return [command_option] if params.get(param) == expected_value else []
4061
4062
4063 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4064     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4065         if use_compat:
4066             return argdict
4067         else:
4068             argdict = None
4069     if argdict is None:
4070         return default
4071     assert isinstance(argdict, dict)
4072
4073     assert isinstance(keys, (list, tuple))
4074     for key_list in keys:
4075         arg_list = list(filter(
4076             lambda x: x is not None,
4077             [argdict.get(key.lower()) for key in variadic(key_list)]))
4078         if arg_list:
4079             return [arg for args in arg_list for arg in args]
4080     return default
4081
4082
4083 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
4084     main_key, exe = main_key.lower(), exe.lower()
4085     root_key = exe if main_key == exe else f'{main_key}+{exe}'
4086     keys = [f'{root_key}{k}' for k in (keys or [''])]
4087     if root_key in keys:
4088         if main_key != exe:
4089             keys.append((main_key, exe))
4090         keys.append('default')
4091     else:
4092         use_compat = False
4093     return cli_configuration_args(argdict, keys, default, use_compat)
4094
4095
4096 class ISO639Utils:
4097     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4098     _lang_map = {
4099         'aa': 'aar',
4100         'ab': 'abk',
4101         'ae': 'ave',
4102         'af': 'afr',
4103         'ak': 'aka',
4104         'am': 'amh',
4105         'an': 'arg',
4106         'ar': 'ara',
4107         'as': 'asm',
4108         'av': 'ava',
4109         'ay': 'aym',
4110         'az': 'aze',
4111         'ba': 'bak',
4112         'be': 'bel',
4113         'bg': 'bul',
4114         'bh': 'bih',
4115         'bi': 'bis',
4116         'bm': 'bam',
4117         'bn': 'ben',
4118         'bo': 'bod',
4119         'br': 'bre',
4120         'bs': 'bos',
4121         'ca': 'cat',
4122         'ce': 'che',
4123         'ch': 'cha',
4124         'co': 'cos',
4125         'cr': 'cre',
4126         'cs': 'ces',
4127         'cu': 'chu',
4128         'cv': 'chv',
4129         'cy': 'cym',
4130         'da': 'dan',
4131         'de': 'deu',
4132         'dv': 'div',
4133         'dz': 'dzo',
4134         'ee': 'ewe',
4135         'el': 'ell',
4136         'en': 'eng',
4137         'eo': 'epo',
4138         'es': 'spa',
4139         'et': 'est',
4140         'eu': 'eus',
4141         'fa': 'fas',
4142         'ff': 'ful',
4143         'fi': 'fin',
4144         'fj': 'fij',
4145         'fo': 'fao',
4146         'fr': 'fra',
4147         'fy': 'fry',
4148         'ga': 'gle',
4149         'gd': 'gla',
4150         'gl': 'glg',
4151         'gn': 'grn',
4152         'gu': 'guj',
4153         'gv': 'glv',
4154         'ha': 'hau',
4155         'he': 'heb',
4156         'iw': 'heb',  # Replaced by he in 1989 revision
4157         'hi': 'hin',
4158         'ho': 'hmo',
4159         'hr': 'hrv',
4160         'ht': 'hat',
4161         'hu': 'hun',
4162         'hy': 'hye',
4163         'hz': 'her',
4164         'ia': 'ina',
4165         'id': 'ind',
4166         'in': 'ind',  # Replaced by id in 1989 revision
4167         'ie': 'ile',
4168         'ig': 'ibo',
4169         'ii': 'iii',
4170         'ik': 'ipk',
4171         'io': 'ido',
4172         'is': 'isl',
4173         'it': 'ita',
4174         'iu': 'iku',
4175         'ja': 'jpn',
4176         'jv': 'jav',
4177         'ka': 'kat',
4178         'kg': 'kon',
4179         'ki': 'kik',
4180         'kj': 'kua',
4181         'kk': 'kaz',
4182         'kl': 'kal',
4183         'km': 'khm',
4184         'kn': 'kan',
4185         'ko': 'kor',
4186         'kr': 'kau',
4187         'ks': 'kas',
4188         'ku': 'kur',
4189         'kv': 'kom',
4190         'kw': 'cor',
4191         'ky': 'kir',
4192         'la': 'lat',
4193         'lb': 'ltz',
4194         'lg': 'lug',
4195         'li': 'lim',
4196         'ln': 'lin',
4197         'lo': 'lao',
4198         'lt': 'lit',
4199         'lu': 'lub',
4200         'lv': 'lav',
4201         'mg': 'mlg',
4202         'mh': 'mah',
4203         'mi': 'mri',
4204         'mk': 'mkd',
4205         'ml': 'mal',
4206         'mn': 'mon',
4207         'mr': 'mar',
4208         'ms': 'msa',
4209         'mt': 'mlt',
4210         'my': 'mya',
4211         'na': 'nau',
4212         'nb': 'nob',
4213         'nd': 'nde',
4214         'ne': 'nep',
4215         'ng': 'ndo',
4216         'nl': 'nld',
4217         'nn': 'nno',
4218         'no': 'nor',
4219         'nr': 'nbl',
4220         'nv': 'nav',
4221         'ny': 'nya',
4222         'oc': 'oci',
4223         'oj': 'oji',
4224         'om': 'orm',
4225         'or': 'ori',
4226         'os': 'oss',
4227         'pa': 'pan',
4228         'pi': 'pli',
4229         'pl': 'pol',
4230         'ps': 'pus',
4231         'pt': 'por',
4232         'qu': 'que',
4233         'rm': 'roh',
4234         'rn': 'run',
4235         'ro': 'ron',
4236         'ru': 'rus',
4237         'rw': 'kin',
4238         'sa': 'san',
4239         'sc': 'srd',
4240         'sd': 'snd',
4241         'se': 'sme',
4242         'sg': 'sag',
4243         'si': 'sin',
4244         'sk': 'slk',
4245         'sl': 'slv',
4246         'sm': 'smo',
4247         'sn': 'sna',
4248         'so': 'som',
4249         'sq': 'sqi',
4250         'sr': 'srp',
4251         'ss': 'ssw',
4252         'st': 'sot',
4253         'su': 'sun',
4254         'sv': 'swe',
4255         'sw': 'swa',
4256         'ta': 'tam',
4257         'te': 'tel',
4258         'tg': 'tgk',
4259         'th': 'tha',
4260         'ti': 'tir',
4261         'tk': 'tuk',
4262         'tl': 'tgl',
4263         'tn': 'tsn',
4264         'to': 'ton',
4265         'tr': 'tur',
4266         'ts': 'tso',
4267         'tt': 'tat',
4268         'tw': 'twi',
4269         'ty': 'tah',
4270         'ug': 'uig',
4271         'uk': 'ukr',
4272         'ur': 'urd',
4273         'uz': 'uzb',
4274         've': 'ven',
4275         'vi': 'vie',
4276         'vo': 'vol',
4277         'wa': 'wln',
4278         'wo': 'wol',
4279         'xh': 'xho',
4280         'yi': 'yid',
4281         'ji': 'yid',  # Replaced by yi in 1989 revision
4282         'yo': 'yor',
4283         'za': 'zha',
4284         'zh': 'zho',
4285         'zu': 'zul',
4286     }
4287
4288     @classmethod
4289     def short2long(cls, code):
4290         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4291         return cls._lang_map.get(code[:2])
4292
4293     @classmethod
4294     def long2short(cls, code):
4295         """Convert language code from ISO 639-2/T to ISO 639-1"""
4296         for short_name, long_name in cls._lang_map.items():
4297             if long_name == code:
4298                 return short_name
4299
4300
4301 class ISO3166Utils:
4302     # From http://data.okfn.org/data/core/country-list
4303     _country_map = {
4304         'AF': 'Afghanistan',
4305         'AX': 'Åland Islands',
4306         'AL': 'Albania',
4307         'DZ': 'Algeria',
4308         'AS': 'American Samoa',
4309         'AD': 'Andorra',
4310         'AO': 'Angola',
4311         'AI': 'Anguilla',
4312         'AQ': 'Antarctica',
4313         'AG': 'Antigua and Barbuda',
4314         'AR': 'Argentina',
4315         'AM': 'Armenia',
4316         'AW': 'Aruba',
4317         'AU': 'Australia',
4318         'AT': 'Austria',
4319         'AZ': 'Azerbaijan',
4320         'BS': 'Bahamas',
4321         'BH': 'Bahrain',
4322         'BD': 'Bangladesh',
4323         'BB': 'Barbados',
4324         'BY': 'Belarus',
4325         'BE': 'Belgium',
4326         'BZ': 'Belize',
4327         'BJ': 'Benin',
4328         'BM': 'Bermuda',
4329         'BT': 'Bhutan',
4330         'BO': 'Bolivia, Plurinational State of',
4331         'BQ': 'Bonaire, Sint Eustatius and Saba',
4332         'BA': 'Bosnia and Herzegovina',
4333         'BW': 'Botswana',
4334         'BV': 'Bouvet Island',
4335         'BR': 'Brazil',
4336         'IO': 'British Indian Ocean Territory',
4337         'BN': 'Brunei Darussalam',
4338         'BG': 'Bulgaria',
4339         'BF': 'Burkina Faso',
4340         'BI': 'Burundi',
4341         'KH': 'Cambodia',
4342         'CM': 'Cameroon',
4343         'CA': 'Canada',
4344         'CV': 'Cape Verde',
4345         'KY': 'Cayman Islands',
4346         'CF': 'Central African Republic',
4347         'TD': 'Chad',
4348         'CL': 'Chile',
4349         'CN': 'China',
4350         'CX': 'Christmas Island',
4351         'CC': 'Cocos (Keeling) Islands',
4352         'CO': 'Colombia',
4353         'KM': 'Comoros',
4354         'CG': 'Congo',
4355         'CD': 'Congo, the Democratic Republic of the',
4356         'CK': 'Cook Islands',
4357         'CR': 'Costa Rica',
4358         'CI': 'Côte d\'Ivoire',
4359         'HR': 'Croatia',
4360         'CU': 'Cuba',
4361         'CW': 'Curaçao',
4362         'CY': 'Cyprus',
4363         'CZ': 'Czech Republic',
4364         'DK': 'Denmark',
4365         'DJ': 'Djibouti',
4366         'DM': 'Dominica',
4367         'DO': 'Dominican Republic',
4368         'EC': 'Ecuador',
4369         'EG': 'Egypt',
4370         'SV': 'El Salvador',
4371         'GQ': 'Equatorial Guinea',
4372         'ER': 'Eritrea',
4373         'EE': 'Estonia',
4374         'ET': 'Ethiopia',
4375         'FK': 'Falkland Islands (Malvinas)',
4376         'FO': 'Faroe Islands',
4377         'FJ': 'Fiji',
4378         'FI': 'Finland',
4379         'FR': 'France',
4380         'GF': 'French Guiana',
4381         'PF': 'French Polynesia',
4382         'TF': 'French Southern Territories',
4383         'GA': 'Gabon',
4384         'GM': 'Gambia',
4385         'GE': 'Georgia',
4386         'DE': 'Germany',
4387         'GH': 'Ghana',
4388         'GI': 'Gibraltar',
4389         'GR': 'Greece',
4390         'GL': 'Greenland',
4391         'GD': 'Grenada',
4392         'GP': 'Guadeloupe',
4393         'GU': 'Guam',
4394         'GT': 'Guatemala',
4395         'GG': 'Guernsey',
4396         'GN': 'Guinea',
4397         'GW': 'Guinea-Bissau',
4398         'GY': 'Guyana',
4399         'HT': 'Haiti',
4400         'HM': 'Heard Island and McDonald Islands',
4401         'VA': 'Holy See (Vatican City State)',
4402         'HN': 'Honduras',
4403         'HK': 'Hong Kong',
4404         'HU': 'Hungary',
4405         'IS': 'Iceland',
4406         'IN': 'India',
4407         'ID': 'Indonesia',
4408         'IR': 'Iran, Islamic Republic of',
4409         'IQ': 'Iraq',
4410         'IE': 'Ireland',
4411         'IM': 'Isle of Man',
4412         'IL': 'Israel',
4413         'IT': 'Italy',
4414         'JM': 'Jamaica',
4415         'JP': 'Japan',
4416         'JE': 'Jersey',
4417         'JO': 'Jordan',
4418         'KZ': 'Kazakhstan',
4419         'KE': 'Kenya',
4420         'KI': 'Kiribati',
4421         'KP': 'Korea, Democratic People\'s Republic of',
4422         'KR': 'Korea, Republic of',
4423         'KW': 'Kuwait',
4424         'KG': 'Kyrgyzstan',
4425         'LA': 'Lao People\'s Democratic Republic',
4426         'LV': 'Latvia',
4427         'LB': 'Lebanon',
4428         'LS': 'Lesotho',
4429         'LR': 'Liberia',
4430         'LY': 'Libya',
4431         'LI': 'Liechtenstein',
4432         'LT': 'Lithuania',
4433         'LU': 'Luxembourg',
4434         'MO': 'Macao',
4435         'MK': 'Macedonia, the Former Yugoslav Republic of',
4436         'MG': 'Madagascar',
4437         'MW': 'Malawi',
4438         'MY': 'Malaysia',
4439         'MV': 'Maldives',
4440         'ML': 'Mali',
4441         'MT': 'Malta',
4442         'MH': 'Marshall Islands',
4443         'MQ': 'Martinique',
4444         'MR': 'Mauritania',
4445         'MU': 'Mauritius',
4446         'YT': 'Mayotte',
4447         'MX': 'Mexico',
4448         'FM': 'Micronesia, Federated States of',
4449         'MD': 'Moldova, Republic of',
4450         'MC': 'Monaco',
4451         'MN': 'Mongolia',
4452         'ME': 'Montenegro',
4453         'MS': 'Montserrat',
4454         'MA': 'Morocco',
4455         'MZ': 'Mozambique',
4456         'MM': 'Myanmar',
4457         'NA': 'Namibia',
4458         'NR': 'Nauru',
4459         'NP': 'Nepal',
4460         'NL': 'Netherlands',
4461         'NC': 'New Caledonia',
4462         'NZ': 'New Zealand',
4463         'NI': 'Nicaragua',
4464         'NE': 'Niger',
4465         'NG': 'Nigeria',
4466         'NU': 'Niue',
4467         'NF': 'Norfolk Island',
4468         'MP': 'Northern Mariana Islands',
4469         'NO': 'Norway',
4470         'OM': 'Oman',
4471         'PK': 'Pakistan',
4472         'PW': 'Palau',
4473         'PS': 'Palestine, State of',
4474         'PA': 'Panama',
4475         'PG': 'Papua New Guinea',
4476         'PY': 'Paraguay',
4477         'PE': 'Peru',
4478         'PH': 'Philippines',
4479         'PN': 'Pitcairn',
4480         'PL': 'Poland',
4481         'PT': 'Portugal',
4482         'PR': 'Puerto Rico',
4483         'QA': 'Qatar',
4484         'RE': 'Réunion',
4485         'RO': 'Romania',
4486         'RU': 'Russian Federation',
4487         'RW': 'Rwanda',
4488         'BL': 'Saint Barthélemy',
4489         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4490         'KN': 'Saint Kitts and Nevis',
4491         'LC': 'Saint Lucia',
4492         'MF': 'Saint Martin (French part)',
4493         'PM': 'Saint Pierre and Miquelon',
4494         'VC': 'Saint Vincent and the Grenadines',
4495         'WS': 'Samoa',
4496         'SM': 'San Marino',
4497         'ST': 'Sao Tome and Principe',
4498         'SA': 'Saudi Arabia',
4499         'SN': 'Senegal',
4500         'RS': 'Serbia',
4501         'SC': 'Seychelles',
4502         'SL': 'Sierra Leone',
4503         'SG': 'Singapore',
4504         'SX': 'Sint Maarten (Dutch part)',
4505         'SK': 'Slovakia',
4506         'SI': 'Slovenia',
4507         'SB': 'Solomon Islands',
4508         'SO': 'Somalia',
4509         'ZA': 'South Africa',
4510         'GS': 'South Georgia and the South Sandwich Islands',
4511         'SS': 'South Sudan',
4512         'ES': 'Spain',
4513         'LK': 'Sri Lanka',
4514         'SD': 'Sudan',
4515         'SR': 'Suriname',
4516         'SJ': 'Svalbard and Jan Mayen',
4517         'SZ': 'Swaziland',
4518         'SE': 'Sweden',
4519         'CH': 'Switzerland',
4520         'SY': 'Syrian Arab Republic',
4521         'TW': 'Taiwan, Province of China',
4522         'TJ': 'Tajikistan',
4523         'TZ': 'Tanzania, United Republic of',
4524         'TH': 'Thailand',
4525         'TL': 'Timor-Leste',
4526         'TG': 'Togo',
4527         'TK': 'Tokelau',
4528         'TO': 'Tonga',
4529         'TT': 'Trinidad and Tobago',
4530         'TN': 'Tunisia',
4531         'TR': 'Turkey',
4532         'TM': 'Turkmenistan',
4533         'TC': 'Turks and Caicos Islands',
4534         'TV': 'Tuvalu',
4535         'UG': 'Uganda',
4536         'UA': 'Ukraine',
4537         'AE': 'United Arab Emirates',
4538         'GB': 'United Kingdom',
4539         'US': 'United States',
4540         'UM': 'United States Minor Outlying Islands',
4541         'UY': 'Uruguay',
4542         'UZ': 'Uzbekistan',
4543         'VU': 'Vanuatu',
4544         'VE': 'Venezuela, Bolivarian Republic of',
4545         'VN': 'Viet Nam',
4546         'VG': 'Virgin Islands, British',
4547         'VI': 'Virgin Islands, U.S.',
4548         'WF': 'Wallis and Futuna',
4549         'EH': 'Western Sahara',
4550         'YE': 'Yemen',
4551         'ZM': 'Zambia',
4552         'ZW': 'Zimbabwe',
4553         # Not ISO 3166 codes, but used for IP blocks
4554         'AP': 'Asia/Pacific Region',
4555         'EU': 'Europe',
4556     }
4557
4558     @classmethod
4559     def short2full(cls, code):
4560         """Convert an ISO 3166-2 country code to the corresponding full name"""
4561         return cls._country_map.get(code.upper())
4562
4563
4564 class GeoUtils:
4565     # Major IPv4 address blocks per country
4566     _country_ip_map = {
4567         'AD': '46.172.224.0/19',
4568         'AE': '94.200.0.0/13',
4569         'AF': '149.54.0.0/17',
4570         'AG': '209.59.64.0/18',
4571         'AI': '204.14.248.0/21',
4572         'AL': '46.99.0.0/16',
4573         'AM': '46.70.0.0/15',
4574         'AO': '105.168.0.0/13',
4575         'AP': '182.50.184.0/21',
4576         'AQ': '23.154.160.0/24',
4577         'AR': '181.0.0.0/12',
4578         'AS': '202.70.112.0/20',
4579         'AT': '77.116.0.0/14',
4580         'AU': '1.128.0.0/11',
4581         'AW': '181.41.0.0/18',
4582         'AX': '185.217.4.0/22',
4583         'AZ': '5.197.0.0/16',
4584         'BA': '31.176.128.0/17',
4585         'BB': '65.48.128.0/17',
4586         'BD': '114.130.0.0/16',
4587         'BE': '57.0.0.0/8',
4588         'BF': '102.178.0.0/15',
4589         'BG': '95.42.0.0/15',
4590         'BH': '37.131.0.0/17',
4591         'BI': '154.117.192.0/18',
4592         'BJ': '137.255.0.0/16',
4593         'BL': '185.212.72.0/23',
4594         'BM': '196.12.64.0/18',
4595         'BN': '156.31.0.0/16',
4596         'BO': '161.56.0.0/16',
4597         'BQ': '161.0.80.0/20',
4598         'BR': '191.128.0.0/12',
4599         'BS': '24.51.64.0/18',
4600         'BT': '119.2.96.0/19',
4601         'BW': '168.167.0.0/16',
4602         'BY': '178.120.0.0/13',
4603         'BZ': '179.42.192.0/18',
4604         'CA': '99.224.0.0/11',
4605         'CD': '41.243.0.0/16',
4606         'CF': '197.242.176.0/21',
4607         'CG': '160.113.0.0/16',
4608         'CH': '85.0.0.0/13',
4609         'CI': '102.136.0.0/14',
4610         'CK': '202.65.32.0/19',
4611         'CL': '152.172.0.0/14',
4612         'CM': '102.244.0.0/14',
4613         'CN': '36.128.0.0/10',
4614         'CO': '181.240.0.0/12',
4615         'CR': '201.192.0.0/12',
4616         'CU': '152.206.0.0/15',
4617         'CV': '165.90.96.0/19',
4618         'CW': '190.88.128.0/17',
4619         'CY': '31.153.0.0/16',
4620         'CZ': '88.100.0.0/14',
4621         'DE': '53.0.0.0/8',
4622         'DJ': '197.241.0.0/17',
4623         'DK': '87.48.0.0/12',
4624         'DM': '192.243.48.0/20',
4625         'DO': '152.166.0.0/15',
4626         'DZ': '41.96.0.0/12',
4627         'EC': '186.68.0.0/15',
4628         'EE': '90.190.0.0/15',
4629         'EG': '156.160.0.0/11',
4630         'ER': '196.200.96.0/20',
4631         'ES': '88.0.0.0/11',
4632         'ET': '196.188.0.0/14',
4633         'EU': '2.16.0.0/13',
4634         'FI': '91.152.0.0/13',
4635         'FJ': '144.120.0.0/16',
4636         'FK': '80.73.208.0/21',
4637         'FM': '119.252.112.0/20',
4638         'FO': '88.85.32.0/19',
4639         'FR': '90.0.0.0/9',
4640         'GA': '41.158.0.0/15',
4641         'GB': '25.0.0.0/8',
4642         'GD': '74.122.88.0/21',
4643         'GE': '31.146.0.0/16',
4644         'GF': '161.22.64.0/18',
4645         'GG': '62.68.160.0/19',
4646         'GH': '154.160.0.0/12',
4647         'GI': '95.164.0.0/16',
4648         'GL': '88.83.0.0/19',
4649         'GM': '160.182.0.0/15',
4650         'GN': '197.149.192.0/18',
4651         'GP': '104.250.0.0/19',
4652         'GQ': '105.235.224.0/20',
4653         'GR': '94.64.0.0/13',
4654         'GT': '168.234.0.0/16',
4655         'GU': '168.123.0.0/16',
4656         'GW': '197.214.80.0/20',
4657         'GY': '181.41.64.0/18',
4658         'HK': '113.252.0.0/14',
4659         'HN': '181.210.0.0/16',
4660         'HR': '93.136.0.0/13',
4661         'HT': '148.102.128.0/17',
4662         'HU': '84.0.0.0/14',
4663         'ID': '39.192.0.0/10',
4664         'IE': '87.32.0.0/12',
4665         'IL': '79.176.0.0/13',
4666         'IM': '5.62.80.0/20',
4667         'IN': '117.192.0.0/10',
4668         'IO': '203.83.48.0/21',
4669         'IQ': '37.236.0.0/14',
4670         'IR': '2.176.0.0/12',
4671         'IS': '82.221.0.0/16',
4672         'IT': '79.0.0.0/10',
4673         'JE': '87.244.64.0/18',
4674         'JM': '72.27.0.0/17',
4675         'JO': '176.29.0.0/16',
4676         'JP': '133.0.0.0/8',
4677         'KE': '105.48.0.0/12',
4678         'KG': '158.181.128.0/17',
4679         'KH': '36.37.128.0/17',
4680         'KI': '103.25.140.0/22',
4681         'KM': '197.255.224.0/20',
4682         'KN': '198.167.192.0/19',
4683         'KP': '175.45.176.0/22',
4684         'KR': '175.192.0.0/10',
4685         'KW': '37.36.0.0/14',
4686         'KY': '64.96.0.0/15',
4687         'KZ': '2.72.0.0/13',
4688         'LA': '115.84.64.0/18',
4689         'LB': '178.135.0.0/16',
4690         'LC': '24.92.144.0/20',
4691         'LI': '82.117.0.0/19',
4692         'LK': '112.134.0.0/15',
4693         'LR': '102.183.0.0/16',
4694         'LS': '129.232.0.0/17',
4695         'LT': '78.56.0.0/13',
4696         'LU': '188.42.0.0/16',
4697         'LV': '46.109.0.0/16',
4698         'LY': '41.252.0.0/14',
4699         'MA': '105.128.0.0/11',
4700         'MC': '88.209.64.0/18',
4701         'MD': '37.246.0.0/16',
4702         'ME': '178.175.0.0/17',
4703         'MF': '74.112.232.0/21',
4704         'MG': '154.126.0.0/17',
4705         'MH': '117.103.88.0/21',
4706         'MK': '77.28.0.0/15',
4707         'ML': '154.118.128.0/18',
4708         'MM': '37.111.0.0/17',
4709         'MN': '49.0.128.0/17',
4710         'MO': '60.246.0.0/16',
4711         'MP': '202.88.64.0/20',
4712         'MQ': '109.203.224.0/19',
4713         'MR': '41.188.64.0/18',
4714         'MS': '208.90.112.0/22',
4715         'MT': '46.11.0.0/16',
4716         'MU': '105.16.0.0/12',
4717         'MV': '27.114.128.0/18',
4718         'MW': '102.70.0.0/15',
4719         'MX': '187.192.0.0/11',
4720         'MY': '175.136.0.0/13',
4721         'MZ': '197.218.0.0/15',
4722         'NA': '41.182.0.0/16',
4723         'NC': '101.101.0.0/18',
4724         'NE': '197.214.0.0/18',
4725         'NF': '203.17.240.0/22',
4726         'NG': '105.112.0.0/12',
4727         'NI': '186.76.0.0/15',
4728         'NL': '145.96.0.0/11',
4729         'NO': '84.208.0.0/13',
4730         'NP': '36.252.0.0/15',
4731         'NR': '203.98.224.0/19',
4732         'NU': '49.156.48.0/22',
4733         'NZ': '49.224.0.0/14',
4734         'OM': '5.36.0.0/15',
4735         'PA': '186.72.0.0/15',
4736         'PE': '186.160.0.0/14',
4737         'PF': '123.50.64.0/18',
4738         'PG': '124.240.192.0/19',
4739         'PH': '49.144.0.0/13',
4740         'PK': '39.32.0.0/11',
4741         'PL': '83.0.0.0/11',
4742         'PM': '70.36.0.0/20',
4743         'PR': '66.50.0.0/16',
4744         'PS': '188.161.0.0/16',
4745         'PT': '85.240.0.0/13',
4746         'PW': '202.124.224.0/20',
4747         'PY': '181.120.0.0/14',
4748         'QA': '37.210.0.0/15',
4749         'RE': '102.35.0.0/16',
4750         'RO': '79.112.0.0/13',
4751         'RS': '93.86.0.0/15',
4752         'RU': '5.136.0.0/13',
4753         'RW': '41.186.0.0/16',
4754         'SA': '188.48.0.0/13',
4755         'SB': '202.1.160.0/19',
4756         'SC': '154.192.0.0/11',
4757         'SD': '102.120.0.0/13',
4758         'SE': '78.64.0.0/12',
4759         'SG': '8.128.0.0/10',
4760         'SI': '188.196.0.0/14',
4761         'SK': '78.98.0.0/15',
4762         'SL': '102.143.0.0/17',
4763         'SM': '89.186.32.0/19',
4764         'SN': '41.82.0.0/15',
4765         'SO': '154.115.192.0/18',
4766         'SR': '186.179.128.0/17',
4767         'SS': '105.235.208.0/21',
4768         'ST': '197.159.160.0/19',
4769         'SV': '168.243.0.0/16',
4770         'SX': '190.102.0.0/20',
4771         'SY': '5.0.0.0/16',
4772         'SZ': '41.84.224.0/19',
4773         'TC': '65.255.48.0/20',
4774         'TD': '154.68.128.0/19',
4775         'TG': '196.168.0.0/14',
4776         'TH': '171.96.0.0/13',
4777         'TJ': '85.9.128.0/18',
4778         'TK': '27.96.24.0/21',
4779         'TL': '180.189.160.0/20',
4780         'TM': '95.85.96.0/19',
4781         'TN': '197.0.0.0/11',
4782         'TO': '175.176.144.0/21',
4783         'TR': '78.160.0.0/11',
4784         'TT': '186.44.0.0/15',
4785         'TV': '202.2.96.0/19',
4786         'TW': '120.96.0.0/11',
4787         'TZ': '156.156.0.0/14',
4788         'UA': '37.52.0.0/14',
4789         'UG': '102.80.0.0/13',
4790         'US': '6.0.0.0/8',
4791         'UY': '167.56.0.0/13',
4792         'UZ': '84.54.64.0/18',
4793         'VA': '212.77.0.0/19',
4794         'VC': '207.191.240.0/21',
4795         'VE': '186.88.0.0/13',
4796         'VG': '66.81.192.0/20',
4797         'VI': '146.226.0.0/16',
4798         'VN': '14.160.0.0/11',
4799         'VU': '202.80.32.0/20',
4800         'WF': '117.20.32.0/21',
4801         'WS': '202.4.32.0/19',
4802         'YE': '134.35.0.0/16',
4803         'YT': '41.242.116.0/22',
4804         'ZA': '41.0.0.0/11',
4805         'ZM': '102.144.0.0/13',
4806         'ZW': '102.177.192.0/18',
4807     }
4808
4809     @classmethod
4810     def random_ipv4(cls, code_or_block):
4811         if len(code_or_block) == 2:
4812             block = cls._country_ip_map.get(code_or_block.upper())
4813             if not block:
4814                 return None
4815         else:
4816             block = code_or_block
4817         addr, preflen = block.split('/')
4818         addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
4819         addr_max = addr_min | (0xffffffff >> int(preflen))
4820         return str(socket.inet_ntoa(
4821             struct.pack('!L', random.randint(addr_min, addr_max))))
4822
4823
4824 class PerRequestProxyHandler(urllib.request.ProxyHandler):
4825     def __init__(self, proxies=None):
4826         # Set default handlers
4827         for type in ('http', 'https'):
4828             setattr(self, '%s_open' % type,
4829                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4830                         meth(r, proxy, type))
4831         urllib.request.ProxyHandler.__init__(self, proxies)
4832
4833     def proxy_open(self, req, proxy, type):
4834         req_proxy = req.headers.get('Ytdl-request-proxy')
4835         if req_proxy is not None:
4836             proxy = req_proxy
4837             del req.headers['Ytdl-request-proxy']
4838
4839         if proxy == '__noproxy__':
4840             return None  # No Proxy
4841         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4842             req.add_header('Ytdl-socks-proxy', proxy)
4843             # yt-dlp's http/https handlers do wrapping the socket with socks
4844             return None
4845         return urllib.request.ProxyHandler.proxy_open(
4846             self, req, proxy, type)
4847
4848
4849 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4850 # released into Public Domain
4851 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4852
4853 def long_to_bytes(n, blocksize=0):
4854     """long_to_bytes(n:long, blocksize:int) : string
4855     Convert a long integer to a byte string.
4856
4857     If optional blocksize is given and greater than zero, pad the front of the
4858     byte string with binary zeros so that the length is a multiple of
4859     blocksize.
4860     """
4861     # after much testing, this algorithm was deemed to be the fastest
4862     s = b''
4863     n = int(n)
4864     while n > 0:
4865         s = struct.pack('>I', n & 0xffffffff) + s
4866         n = n >> 32
4867     # strip off leading zeros
4868     for i in range(len(s)):
4869         if s[i] != b'\000'[0]:
4870             break
4871     else:
4872         # only happens when n == 0
4873         s = b'\000'
4874         i = 0
4875     s = s[i:]
4876     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4877     # de-padding being done above, but sigh...
4878     if blocksize > 0 and len(s) % blocksize:
4879         s = (blocksize - len(s) % blocksize) * b'\000' + s
4880     return s
4881
4882
4883 def bytes_to_long(s):
4884     """bytes_to_long(string) : long
4885     Convert a byte string to a long integer.
4886
4887     This is (essentially) the inverse of long_to_bytes().
4888     """
4889     acc = 0
4890     length = len(s)
4891     if length % 4:
4892         extra = (4 - length % 4)
4893         s = b'\000' * extra + s
4894         length = length + extra
4895     for i in range(0, length, 4):
4896         acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
4897     return acc
4898
4899
4900 def ohdave_rsa_encrypt(data, exponent, modulus):
4901     '''
4902     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4903
4904     Input:
4905         data: data to encrypt, bytes-like object
4906         exponent, modulus: parameter e and N of RSA algorithm, both integer
4907     Output: hex string of encrypted data
4908
4909     Limitation: supports one block encryption only
4910     '''
4911
4912     payload = int(binascii.hexlify(data[::-1]), 16)
4913     encrypted = pow(payload, exponent, modulus)
4914     return '%x' % encrypted
4915
4916
4917 def pkcs1pad(data, length):
4918     """
4919     Padding input data with PKCS#1 scheme
4920
4921     @param {int[]} data        input data
4922     @param {int}   length      target length
4923     @returns {int[]}           padded data
4924     """
4925     if len(data) > length - 11:
4926         raise ValueError('Input data too long for PKCS#1 padding')
4927
4928     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4929     return [0, 2] + pseudo_random + [0] + data
4930
4931
4932 def _base_n_table(n, table):
4933     if not table and not n:
4934         raise ValueError('Either table or n must be specified')
4935     table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
4936
4937     if n and n != len(table):
4938         raise ValueError(f'base {n} exceeds table length {len(table)}')
4939     return table
4940
4941
4942 def encode_base_n(num, n=None, table=None):
4943     """Convert given int to a base-n string"""
4944     table = _base_n_table(n, table)
4945     if not num:
4946         return table[0]
4947
4948     result, base = '', len(table)
4949     while num:
4950         result = table[num % base] + result
4951         num = num // base
4952     return result
4953
4954
4955 def decode_base_n(string, n=None, table=None):
4956     """Convert given base-n string to int"""
4957     table = {char: index for index, char in enumerate(_base_n_table(n, table))}
4958     result, base = 0, len(table)
4959     for char in string:
4960         result = result * base + table[char]
4961     return result
4962
4963
4964 def decode_base(value, digits):
4965     deprecation_warning(f'{__name__}.decode_base is deprecated and may be removed '
4966                         f'in a future version. Use {__name__}.decode_base_n instead')
4967     return decode_base_n(value, table=digits)
4968
4969
4970 def decode_packed_codes(code):
4971     mobj = re.search(PACKED_CODES_RE, code)
4972     obfuscated_code, base, count, symbols = mobj.groups()
4973     base = int(base)
4974     count = int(count)
4975     symbols = symbols.split('|')
4976     symbol_table = {}
4977
4978     while count:
4979         count -= 1
4980         base_n_count = encode_base_n(count, base)
4981         symbol_table[base_n_count] = symbols[count] or base_n_count
4982
4983     return re.sub(
4984         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4985         obfuscated_code)
4986
4987
4988 def caesar(s, alphabet, shift):
4989     if shift == 0:
4990         return s
4991     l = len(alphabet)
4992     return ''.join(
4993         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4994         for c in s)
4995
4996
4997 def rot47(s):
4998     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4999
5000
5001 def parse_m3u8_attributes(attrib):
5002     info = {}
5003     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5004         if val.startswith('"'):
5005             val = val[1:-1]
5006         info[key] = val
5007     return info
5008
5009
5010 def urshift(val, n):
5011     return val >> n if val >= 0 else (val + 0x100000000) >> n
5012
5013
5014 # Based on png2str() written by @gdkchan and improved by @yokrysty
5015 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5016 def decode_png(png_data):
5017     # Reference: https://www.w3.org/TR/PNG/
5018     header = png_data[8:]
5019
5020     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5021         raise OSError('Not a valid PNG file.')
5022
5023     int_map = {1: '>B', 2: '>H', 4: '>I'}
5024     unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
5025
5026     chunks = []
5027
5028     while header:
5029         length = unpack_integer(header[:4])
5030         header = header[4:]
5031
5032         chunk_type = header[:4]
5033         header = header[4:]
5034
5035         chunk_data = header[:length]
5036         header = header[length:]
5037
5038         header = header[4:]  # Skip CRC
5039
5040         chunks.append({
5041             'type': chunk_type,
5042             'length': length,
5043             'data': chunk_data
5044         })
5045
5046     ihdr = chunks[0]['data']
5047
5048     width = unpack_integer(ihdr[:4])
5049     height = unpack_integer(ihdr[4:8])
5050
5051     idat = b''
5052
5053     for chunk in chunks:
5054         if chunk['type'] == b'IDAT':
5055             idat += chunk['data']
5056
5057     if not idat:
5058         raise OSError('Unable to read PNG data.')
5059
5060     decompressed_data = bytearray(zlib.decompress(idat))
5061
5062     stride = width * 3
5063     pixels = []
5064
5065     def _get_pixel(idx):
5066         x = idx % stride
5067         y = idx // stride
5068         return pixels[y][x]
5069
5070     for y in range(height):
5071         basePos = y * (1 + stride)
5072         filter_type = decompressed_data[basePos]
5073
5074         current_row = []
5075
5076         pixels.append(current_row)
5077
5078         for x in range(stride):
5079             color = decompressed_data[1 + basePos + x]
5080             basex = y * stride + x
5081             left = 0
5082             up = 0
5083
5084             if x > 2:
5085                 left = _get_pixel(basex - 3)
5086             if y > 0:
5087                 up = _get_pixel(basex - stride)
5088
5089             if filter_type == 1:  # Sub
5090                 color = (color + left) & 0xff
5091             elif filter_type == 2:  # Up
5092                 color = (color + up) & 0xff
5093             elif filter_type == 3:  # Average
5094                 color = (color + ((left + up) >> 1)) & 0xff
5095             elif filter_type == 4:  # Paeth
5096                 a = left
5097                 b = up
5098                 c = 0
5099
5100                 if x > 2 and y > 0:
5101                     c = _get_pixel(basex - stride - 3)
5102
5103                 p = a + b - c
5104
5105                 pa = abs(p - a)
5106                 pb = abs(p - b)
5107                 pc = abs(p - c)
5108
5109                 if pa <= pb and pa <= pc:
5110                     color = (color + a) & 0xff
5111                 elif pb <= pc:
5112                     color = (color + b) & 0xff
5113                 else:
5114                     color = (color + c) & 0xff
5115
5116             current_row.append(color)
5117
5118     return width, height, pixels
5119
5120
5121 def write_xattr(path, key, value):
5122     # Windows: Write xattrs to NTFS Alternate Data Streams:
5123     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5124     if compat_os_name == 'nt':
5125         assert ':' not in key
5126         assert os.path.exists(path)
5127
5128         try:
5129             with open(f'{path}:{key}', 'wb') as f:
5130                 f.write(value)
5131         except OSError as e:
5132             raise XAttrMetadataError(e.errno, e.strerror)
5133         return
5134
5135     # UNIX Method 1. Use xattrs/pyxattrs modules
5136
5137     setxattr = None
5138     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
5139         # Unicode arguments are not supported in pyxattr until version 0.5.0
5140         # See https://github.com/ytdl-org/youtube-dl/issues/5498
5141         if version_tuple(xattr.__version__) >= (0, 5, 0):
5142             setxattr = xattr.set
5143     elif xattr:
5144         setxattr = xattr.setxattr
5145
5146     if setxattr:
5147         try:
5148             setxattr(path, key, value)
5149         except OSError as e:
5150             raise XAttrMetadataError(e.errno, e.strerror)
5151         return
5152
5153     # UNIX Method 2. Use setfattr/xattr executables
5154     exe = ('setfattr' if check_executable('setfattr', ['--version'])
5155            else 'xattr' if check_executable('xattr', ['-h']) else None)
5156     if not exe:
5157         raise XAttrUnavailableError(
5158             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
5159             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
5160
5161     value = value.decode()
5162     try:
5163         _, stderr, returncode = Popen.run(
5164             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
5165             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5166     except OSError as e:
5167         raise XAttrMetadataError(e.errno, e.strerror)
5168     if returncode:
5169         raise XAttrMetadataError(returncode, stderr)
5170
5171
5172 def random_birthday(year_field, month_field, day_field):
5173     start_date = datetime.date(1950, 1, 1)
5174     end_date = datetime.date(1995, 12, 31)
5175     offset = random.randint(0, (end_date - start_date).days)
5176     random_date = start_date + datetime.timedelta(offset)
5177     return {
5178         year_field: str(random_date.year),
5179         month_field: str(random_date.month),
5180         day_field: str(random_date.day),
5181     }
5182
5183
5184 # Templates for internet shortcut files, which are plain text files.
5185 DOT_URL_LINK_TEMPLATE = '''\
5186 [InternetShortcut]
5187 URL=%(url)s
5188 '''
5189
5190 DOT_WEBLOC_LINK_TEMPLATE = '''\
5191 <?xml version="1.0" encoding="UTF-8"?>
5192 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5193 <plist version="1.0">
5194 <dict>
5195 \t<key>URL</key>
5196 \t<string>%(url)s</string>
5197 </dict>
5198 </plist>
5199 '''
5200
5201 DOT_DESKTOP_LINK_TEMPLATE = '''\
5202 [Desktop Entry]
5203 Encoding=UTF-8
5204 Name=%(filename)s
5205 Type=Link
5206 URL=%(url)s
5207 Icon=text-html
5208 '''
5209
5210 LINK_TEMPLATES = {
5211     'url': DOT_URL_LINK_TEMPLATE,
5212     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5213     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5214 }
5215
5216
5217 def iri_to_uri(iri):
5218     """
5219     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5220
5221     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5222     """
5223
5224     iri_parts = urllib.parse.urlparse(iri)
5225
5226     if '[' in iri_parts.netloc:
5227         raise ValueError('IPv6 URIs are not, yet, supported.')
5228         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5229
5230     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5231
5232     net_location = ''
5233     if iri_parts.username:
5234         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5235         if iri_parts.password is not None:
5236             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5237         net_location += '@'
5238
5239     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5240     # The 'idna' encoding produces ASCII text.
5241     if iri_parts.port is not None and iri_parts.port != 80:
5242         net_location += ':' + str(iri_parts.port)
5243
5244     return urllib.parse.urlunparse(
5245         (iri_parts.scheme,
5246             net_location,
5247
5248             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5249
5250             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5251             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5252
5253             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5254             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5255
5256             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5257
5258     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5259
5260
5261 def to_high_limit_path(path):
5262     if sys.platform in ['win32', 'cygwin']:
5263         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5264         return '\\\\?\\' + os.path.abspath(path)
5265
5266     return path
5267
5268
5269 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
5270     val = traverse_obj(obj, *variadic(field))
5271     if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
5272         return default
5273     return template % func(val)
5274
5275
5276 def clean_podcast_url(url):
5277     return re.sub(r'''(?x)
5278         (?:
5279             (?:
5280                 chtbl\.com/track|
5281                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5282                 play\.podtrac\.com
5283             )/[^/]+|
5284             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5285             flex\.acast\.com|
5286             pd(?:
5287                 cn\.co| # https://podcorn.com/analytics-prefix/
5288                 st\.fm # https://podsights.com/docs/
5289             )/e
5290         )/''', '', url)
5291
5292
5293 _HEX_TABLE = '0123456789abcdef'
5294
5295
5296 def random_uuidv4():
5297     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5298
5299
5300 def make_dir(path, to_screen=None):
5301     try:
5302         dn = os.path.dirname(path)
5303         if dn and not os.path.exists(dn):
5304             os.makedirs(dn)
5305         return True
5306     except OSError as err:
5307         if callable(to_screen) is not None:
5308             to_screen('unable to create directory ' + error_to_compat_str(err))
5309         return False
5310
5311
5312 def get_executable_path():
5313     from .update import _get_variant_and_executable_path
5314
5315     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5316
5317
5318 def load_plugins(name, suffix, namespace):
5319     classes = {}
5320     with contextlib.suppress(FileNotFoundError):
5321         plugins_spec = importlib.util.spec_from_file_location(
5322             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5323         plugins = importlib.util.module_from_spec(plugins_spec)
5324         sys.modules[plugins_spec.name] = plugins
5325         plugins_spec.loader.exec_module(plugins)
5326         for name in dir(plugins):
5327             if name in namespace:
5328                 continue
5329             if not name.endswith(suffix):
5330                 continue
5331             klass = getattr(plugins, name)
5332             classes[name] = namespace[name] = klass
5333     return classes
5334
5335
5336 def traverse_obj(
5337         obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
5338         casesense=True, is_user_input=False, traverse_string=False):
5339     """
5340     Safely traverse nested `dict`s and `Sequence`s
5341
5342     >>> obj = [{}, {"key": "value"}]
5343     >>> traverse_obj(obj, (1, "key"))
5344     "value"
5345
5346     Each of the provided `paths` is tested and the first producing a valid result will be returned.
5347     The next path will also be tested if the path branched but no results could be found.
5348     Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
5349     A value of None is treated as the absence of a value.
5350
5351     The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
5352
5353     The keys in the path can be one of:
5354         - `None`:           Return the current object.
5355         - `str`/`int`:      Return `obj[key]`. For `re.Match, return `obj.group(key)`.
5356         - `slice`:          Branch out and return all values in `obj[key]`.
5357         - `Ellipsis`:       Branch out and return a list of all values.
5358         - `tuple`/`list`:   Branch out and return a list of all matching values.
5359                             Read as: `[traverse_obj(obj, branch) for branch in branches]`.
5360         - `function`:       Branch out and return values filtered by the function.
5361                             Read as: `[value for key, value in obj if function(key, value)]`.
5362                             For `Sequence`s, `key` is the index of the value.
5363         - `dict`            Transform the current object and return a matching dict.
5364                             Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
5365
5366         `tuple`, `list`, and `dict` all support nested paths and branches.
5367
5368     @params paths           Paths which to traverse by.
5369     @param default          Value to return if the paths do not match.
5370     @param expected_type    If a `type`, only accept final values of this type.
5371                             If any other callable, try to call the function on each result.
5372     @param get_all          If `False`, return the first matching result, otherwise all matching ones.
5373     @param casesense        If `False`, consider string dictionary keys as case insensitive.
5374
5375     The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
5376
5377     @param is_user_input    Whether the keys are generated from user input.
5378                             If `True` strings get converted to `int`/`slice` if needed.
5379     @param traverse_string  Whether to traverse into objects as strings.
5380                             If `True`, any non-compatible object will first be
5381                             converted into a string and then traversed into.
5382
5383
5384     @returns                The result of the object traversal.
5385                             If successful, `get_all=True`, and the path branches at least once,
5386                             then a list of results is returned instead.
5387                             A list is always returned if the last path branches and no `default` is given.
5388     """
5389     is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
5390     casefold = lambda k: k.casefold() if isinstance(k, str) else k
5391
5392     if isinstance(expected_type, type):
5393         type_test = lambda val: val if isinstance(val, expected_type) else None
5394     else:
5395         type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
5396
5397     def apply_key(key, obj):
5398         if obj is None:
5399             return
5400
5401         elif key is None:
5402             yield obj
5403
5404         elif isinstance(key, (list, tuple)):
5405             for branch in key:
5406                 _, result = apply_path(obj, branch)
5407                 yield from result
5408
5409         elif key is ...:
5410             if isinstance(obj, collections.abc.Mapping):
5411                 yield from obj.values()
5412             elif is_sequence(obj):
5413                 yield from obj
5414             elif isinstance(obj, re.Match):
5415                 yield from obj.groups()
5416             elif traverse_string:
5417                 yield from str(obj)
5418
5419         elif callable(key):
5420             if is_sequence(obj):
5421                 iter_obj = enumerate(obj)
5422             elif isinstance(obj, collections.abc.Mapping):
5423                 iter_obj = obj.items()
5424             elif isinstance(obj, re.Match):
5425                 iter_obj = enumerate((obj.group(), *obj.groups()))
5426             elif traverse_string:
5427                 iter_obj = enumerate(str(obj))
5428             else:
5429                 return
5430             yield from (v for k, v in iter_obj if try_call(key, args=(k, v)))
5431
5432         elif isinstance(key, dict):
5433             iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
5434             yield {k: v if v is not None else default for k, v in iter_obj
5435                    if v is not None or default is not NO_DEFAULT}
5436
5437         elif isinstance(obj, collections.abc.Mapping):
5438             yield (obj.get(key) if casesense or (key in obj)
5439                    else next((v for k, v in obj.items() if casefold(k) == key), None))
5440
5441         elif isinstance(obj, re.Match):
5442             if isinstance(key, int) or casesense:
5443                 with contextlib.suppress(IndexError):
5444                     yield obj.group(key)
5445                     return
5446
5447             if not isinstance(key, str):
5448                 return
5449
5450             yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
5451
5452         else:
5453             if is_user_input:
5454                 key = (int_or_none(key) if ':' not in key
5455                        else slice(*map(int_or_none, key.split(':'))))
5456
5457             if not isinstance(key, (int, slice)):
5458                 return
5459
5460             if not is_sequence(obj):
5461                 if not traverse_string:
5462                     return
5463                 obj = str(obj)
5464
5465             with contextlib.suppress(IndexError):
5466                 yield obj[key]
5467
5468     def apply_path(start_obj, path):
5469         objs = (start_obj,)
5470         has_branched = False
5471
5472         for key in variadic(path):
5473             if is_user_input and key == ':':
5474                 key = ...
5475
5476             if not casesense and isinstance(key, str):
5477                 key = key.casefold()
5478
5479             if key is ... or isinstance(key, (list, tuple)) or callable(key):
5480                 has_branched = True
5481
5482             key_func = functools.partial(apply_key, key)
5483             objs = itertools.chain.from_iterable(map(key_func, objs))
5484
5485         return has_branched, objs
5486
5487     def _traverse_obj(obj, path, use_list=True):
5488         has_branched, results = apply_path(obj, path)
5489         results = LazyList(x for x in map(type_test, results) if x is not None)
5490
5491         if get_all and has_branched:
5492             return results.exhaust() if results or use_list else None
5493
5494         return results[0] if results else None
5495
5496     for index, path in enumerate(paths, 1):
5497         use_list = default is NO_DEFAULT and index == len(paths)
5498         result = _traverse_obj(obj, path, use_list)
5499         if result is not None:
5500             return result
5501
5502     return None if default is NO_DEFAULT else default
5503
5504
5505 def traverse_dict(dictn, keys, casesense=True):
5506     deprecation_warning(f'"{__name__}.traverse_dict" is deprecated and may be removed '
5507                         f'in a future version. Use "{__name__}.traverse_obj" instead')
5508     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5509
5510
5511 def get_first(obj, keys, **kwargs):
5512     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5513
5514
5515 def time_seconds(**kwargs):
5516     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5517     return t.timestamp()
5518
5519
5520 # create a JSON Web Signature (jws) with HS256 algorithm
5521 # the resulting format is in JWS Compact Serialization
5522 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5523 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5524 def jwt_encode_hs256(payload_data, key, headers={}):
5525     header_data = {
5526         'alg': 'HS256',
5527         'typ': 'JWT',
5528     }
5529     if headers:
5530         header_data.update(headers)
5531     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5532     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5533     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5534     signature_b64 = base64.b64encode(h.digest())
5535     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5536     return token
5537
5538
5539 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5540 def jwt_decode_hs256(jwt):
5541     header_b64, payload_b64, signature_b64 = jwt.split('.')
5542     # add trailing ='s that may have been stripped, superfluous ='s are ignored
5543     payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
5544     return payload_data
5545
5546
5547 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5548
5549
5550 @functools.cache
5551 def supports_terminal_sequences(stream):
5552     if compat_os_name == 'nt':
5553         if not WINDOWS_VT_MODE:
5554             return False
5555     elif not os.getenv('TERM'):
5556         return False
5557     try:
5558         return stream.isatty()
5559     except BaseException:
5560         return False
5561
5562
5563 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5564     if get_windows_version() < (10, 0, 10586):
5565         return
5566     global WINDOWS_VT_MODE
5567     try:
5568         Popen.run('', shell=True)
5569     except Exception:
5570         return
5571
5572     WINDOWS_VT_MODE = True
5573     supports_terminal_sequences.cache_clear()
5574
5575
5576 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5577
5578
5579 def remove_terminal_sequences(string):
5580     return _terminal_sequences_re.sub('', string)
5581
5582
5583 def number_of_digits(number):
5584     return len('%d' % number)
5585
5586
5587 def join_nonempty(*values, delim='-', from_dict=None):
5588     if from_dict is not None:
5589         values = (traverse_obj(from_dict, variadic(v)) for v in values)
5590     return delim.join(map(str, filter(None, values)))
5591
5592
5593 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5594     """
5595     Find the largest format dimensions in terms of video width and, for each thumbnail:
5596     * Modify the URL: Match the width with the provided regex and replace with the former width
5597     * Update dimensions
5598
5599     This function is useful with video services that scale the provided thumbnails on demand
5600     """
5601     _keys = ('width', 'height')
5602     max_dimensions = max(
5603         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5604         default=(0, 0))
5605     if not max_dimensions[0]:
5606         return thumbnails
5607     return [
5608         merge_dicts(
5609             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5610             dict(zip(_keys, max_dimensions)), thumbnail)
5611         for thumbnail in thumbnails
5612     ]
5613
5614
5615 def parse_http_range(range):
5616     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5617     if not range:
5618         return None, None, None
5619     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5620     if not crg:
5621         return None, None, None
5622     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5623
5624
5625 def read_stdin(what):
5626     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5627     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5628     return sys.stdin
5629
5630
5631 def determine_file_encoding(data):
5632     """
5633     Detect the text encoding used
5634     @returns (encoding, bytes to skip)
5635     """
5636
5637     # BOM marks are given priority over declarations
5638     for bom, enc in BOMS:
5639         if data.startswith(bom):
5640             return enc, len(bom)
5641
5642     # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
5643     # We ignore the endianness to get a good enough match
5644     data = data.replace(b'\0', b'')
5645     mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
5646     return mobj.group(1).decode() if mobj else None, 0
5647
5648
5649 class Config:
5650     own_args = None
5651     parsed_args = None
5652     filename = None
5653     __initialized = False
5654
5655     def __init__(self, parser, label=None):
5656         self.parser, self.label = parser, label
5657         self._loaded_paths, self.configs = set(), []
5658
5659     def init(self, args=None, filename=None):
5660         assert not self.__initialized
5661         self.own_args, self.filename = args, filename
5662         return self.load_configs()
5663
5664     def load_configs(self):
5665         directory = ''
5666         if self.filename:
5667             location = os.path.realpath(self.filename)
5668             directory = os.path.dirname(location)
5669             if location in self._loaded_paths:
5670                 return False
5671             self._loaded_paths.add(location)
5672
5673         self.__initialized = True
5674         opts, _ = self.parser.parse_known_args(self.own_args)
5675         self.parsed_args = self.own_args
5676         for location in opts.config_locations or []:
5677             if location == '-':
5678                 if location in self._loaded_paths:
5679                     continue
5680                 self._loaded_paths.add(location)
5681                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5682                 continue
5683             location = os.path.join(directory, expand_path(location))
5684             if os.path.isdir(location):
5685                 location = os.path.join(location, 'yt-dlp.conf')
5686             if not os.path.exists(location):
5687                 self.parser.error(f'config location {location} does not exist')
5688             self.append_config(self.read_file(location), location)
5689         return True
5690
5691     def __str__(self):
5692         label = join_nonempty(
5693             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5694             delim=' ')
5695         return join_nonempty(
5696             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5697             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5698             delim='\n')
5699
5700     @staticmethod
5701     def read_file(filename, default=[]):
5702         try:
5703             optionf = open(filename, 'rb')
5704         except OSError:
5705             return default  # silently skip if file is not present
5706         try:
5707             enc, skip = determine_file_encoding(optionf.read(512))
5708             optionf.seek(skip, io.SEEK_SET)
5709         except OSError:
5710             enc = None  # silently skip read errors
5711         try:
5712             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5713             contents = optionf.read().decode(enc or preferredencoding())
5714             res = shlex.split(contents, comments=True)
5715         except Exception as err:
5716             raise ValueError(f'Unable to parse "{filename}": {err}')
5717         finally:
5718             optionf.close()
5719         return res
5720
5721     @staticmethod
5722     def hide_login_info(opts):
5723         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5724         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5725
5726         def _scrub_eq(o):
5727             m = eqre.match(o)
5728             if m:
5729                 return m.group('key') + '=PRIVATE'
5730             else:
5731                 return o
5732
5733         opts = list(map(_scrub_eq, opts))
5734         for idx, opt in enumerate(opts):
5735             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5736                 opts[idx + 1] = 'PRIVATE'
5737         return opts
5738
5739     def append_config(self, *args, label=None):
5740         config = type(self)(self.parser, label)
5741         config._loaded_paths = self._loaded_paths
5742         if config.init(*args):
5743             self.configs.append(config)
5744
5745     @property
5746     def all_args(self):
5747         for config in reversed(self.configs):
5748             yield from config.all_args
5749         yield from self.parsed_args or []
5750
5751     def parse_known_args(self, **kwargs):
5752         return self.parser.parse_known_args(self.all_args, **kwargs)
5753
5754     def parse_args(self):
5755         return self.parser.parse_args(self.all_args)
5756
5757
5758 class WebSocketsWrapper:
5759     """Wraps websockets module to use in non-async scopes"""
5760     pool = None
5761
5762     def __init__(self, url, headers=None, connect=True):
5763         self.loop = asyncio.new_event_loop()
5764         # XXX: "loop" is deprecated
5765         self.conn = websockets.connect(
5766             url, extra_headers=headers, ping_interval=None,
5767             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5768         if connect:
5769             self.__enter__()
5770         atexit.register(self.__exit__, None, None, None)
5771
5772     def __enter__(self):
5773         if not self.pool:
5774             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5775         return self
5776
5777     def send(self, *args):
5778         self.run_with_loop(self.pool.send(*args), self.loop)
5779
5780     def recv(self, *args):
5781         return self.run_with_loop(self.pool.recv(*args), self.loop)
5782
5783     def __exit__(self, type, value, traceback):
5784         try:
5785             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5786         finally:
5787             self.loop.close()
5788             self._cancel_all_tasks(self.loop)
5789
5790     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5791     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5792     @staticmethod
5793     def run_with_loop(main, loop):
5794         if not asyncio.iscoroutine(main):
5795             raise ValueError(f'a coroutine was expected, got {main!r}')
5796
5797         try:
5798             return loop.run_until_complete(main)
5799         finally:
5800             loop.run_until_complete(loop.shutdown_asyncgens())
5801             if hasattr(loop, 'shutdown_default_executor'):
5802                 loop.run_until_complete(loop.shutdown_default_executor())
5803
5804     @staticmethod
5805     def _cancel_all_tasks(loop):
5806         to_cancel = asyncio.all_tasks(loop)
5807
5808         if not to_cancel:
5809             return
5810
5811         for task in to_cancel:
5812             task.cancel()
5813
5814         # XXX: "loop" is removed in python 3.10+
5815         loop.run_until_complete(
5816             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5817
5818         for task in to_cancel:
5819             if task.cancelled():
5820                 continue
5821             if task.exception() is not None:
5822                 loop.call_exception_handler({
5823                     'message': 'unhandled exception during asyncio.run() shutdown',
5824                     'exception': task.exception(),
5825                     'task': task,
5826                 })
5827
5828
5829 def merge_headers(*dicts):
5830     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5831     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5832
5833
5834 def cached_method(f):
5835     """Cache a method"""
5836     signature = inspect.signature(f)
5837
5838     @functools.wraps(f)
5839     def wrapper(self, *args, **kwargs):
5840         bound_args = signature.bind(self, *args, **kwargs)
5841         bound_args.apply_defaults()
5842         key = tuple(bound_args.arguments.values())[1:]
5843
5844         cache = vars(self).setdefault('__cached_method__cache', {}).setdefault(f.__name__, {})
5845         if key not in cache:
5846             cache[key] = f(self, *args, **kwargs)
5847         return cache[key]
5848     return wrapper
5849
5850
5851 class classproperty:
5852     """property access for class methods"""
5853
5854     def __init__(self, func):
5855         functools.update_wrapper(self, func)
5856         self.func = func
5857
5858     def __get__(self, _, cls):
5859         return self.func(cls)
5860
5861
5862 class Namespace(types.SimpleNamespace):
5863     """Immutable namespace"""
5864
5865     def __iter__(self):
5866         return iter(self.__dict__.values())
5867
5868     @property
5869     def items_(self):
5870         return self.__dict__.items()
5871
5872
5873 MEDIA_EXTENSIONS = Namespace(
5874     common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
5875     video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
5876     common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5877     audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'),
5878     thumbnails=('jpg', 'png', 'webp'),
5879     storyboards=('mhtml', ),
5880     subtitles=('srt', 'vtt', 'ass', 'lrc'),
5881     manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5882 )
5883 MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
5884 MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
5885
5886 KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
5887
5888
5889 class RetryManager:
5890     """Usage:
5891         for retry in RetryManager(...):
5892             try:
5893                 ...
5894             except SomeException as err:
5895                 retry.error = err
5896                 continue
5897     """
5898     attempt, _error = 0, None
5899
5900     def __init__(self, _retries, _error_callback, **kwargs):
5901         self.retries = _retries or 0
5902         self.error_callback = functools.partial(_error_callback, **kwargs)
5903
5904     def _should_retry(self):
5905         return self._error is not NO_DEFAULT and self.attempt <= self.retries
5906
5907     @property
5908     def error(self):
5909         if self._error is NO_DEFAULT:
5910             return None
5911         return self._error
5912
5913     @error.setter
5914     def error(self, value):
5915         self._error = value
5916
5917     def __iter__(self):
5918         while self._should_retry():
5919             self.error = NO_DEFAULT
5920             self.attempt += 1
5921             yield self
5922             if self.error:
5923                 self.error_callback(self.error, self.attempt, self.retries)
5924
5925     @staticmethod
5926     def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
5927         """Utility function for reporting retries"""
5928         if count > retries:
5929             if error:
5930                 return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
5931             raise e
5932
5933         if not count:
5934             return warn(e)
5935         elif isinstance(e, ExtractorError):
5936             e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
5937         warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5938
5939         delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
5940         if delay:
5941             info(f'Sleeping {delay:.2f} seconds ...')
5942             time.sleep(delay)
5943
5944
5945 def make_archive_id(ie, video_id):
5946     ie_key = ie if isinstance(ie, str) else ie.ie_key()
5947     return f'{ie_key.lower()} {video_id}'
5948
5949
5950 def truncate_string(s, left, right=0):
5951     assert left > 3 and right >= 0
5952     if s is None or len(s) <= left + right:
5953         return s
5954     return f'{s[:left-3]}...{s[-right:]}'
5955
5956
5957 def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
5958     assert 'all' in alias_dict, '"all" alias is required'
5959     requested = list(start or [])
5960     for val in options:
5961         discard = val.startswith('-')
5962         if discard:
5963             val = val[1:]
5964
5965         if val in alias_dict:
5966             val = alias_dict[val] if not discard else [
5967                 i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
5968             # NB: Do not allow regex in aliases for performance
5969             requested = orderedSet_from_options(val, alias_dict, start=requested)
5970             continue
5971
5972         current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
5973                    else [val] if val in alias_dict['all'] else None)
5974         if current is None:
5975             raise ValueError(val)
5976
5977         if discard:
5978             for item in current:
5979                 while item in requested:
5980                     requested.remove(item)
5981         else:
5982             requested.extend(current)
5983
5984     return orderedSet(requested)
5985
5986
5987 # Deprecated
5988 has_certifi = bool(certifi)
5989 has_websockets = bool(websockets)