yt_dlp/utils.py

   1 import atexit
   2 import base64
   3 import binascii
   4 import calendar
   5 import codecs
   6 import collections
   7 import contextlib
   8 import ctypes
   9 import datetime
  10 import email.header
  11 import email.utils
  12 import errno
  13 import gzip
  14 import hashlib
  15 import hmac
  16 import html.entities
  17 import html.parser
  18 import http.client
  19 import http.cookiejar
  20 import importlib.util
  21 import inspect
  22 import io
  23 import itertools
  24 import json
  25 import locale
  26 import math
  27 import mimetypes
  28 import operator
  29 import os
  30 import platform
  31 import random
  32 import re
  33 import shlex
  34 import socket
  35 import ssl
  36 import struct
  37 import subprocess
  38 import sys
  39 import tempfile
  40 import time
  41 import traceback
  42 import types
  43 import urllib.error
  44 import urllib.parse
  45 import urllib.request
  46 import xml.etree.ElementTree
  47 import zlib
  48
  49 from .compat import asyncio, functools  # isort: split
  50 from .compat import (
  51     compat_etree_fromstring,
  52     compat_expanduser,
  53     compat_HTMLParseError,
  54     compat_os_name,
  55     compat_shlex_quote,
  56 )
  57 from .dependencies import brotli, certifi, websockets, xattr
  58 from .socks import ProxyType, sockssocket
  59
  60
  61 def register_socks_protocols():
  62     # "Register" SOCKS protocols
  63     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  64     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  65     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  66         if scheme not in urllib.parse.uses_netloc:
  67             urllib.parse.uses_netloc.append(scheme)
  68
  69
  70 # This is not clearly defined otherwise
  71 compiled_regex_type = type(re.compile(''))
  72
  73
  74 def random_user_agent():
  75     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  76     _CHROME_VERSIONS = (
  77         '90.0.4430.212',
  78         '90.0.4430.24',
  79         '90.0.4430.70',
  80         '90.0.4430.72',
  81         '90.0.4430.85',
  82         '90.0.4430.93',
  83         '91.0.4472.101',
  84         '91.0.4472.106',
  85         '91.0.4472.114',
  86         '91.0.4472.124',
  87         '91.0.4472.164',
  88         '91.0.4472.19',
  89         '91.0.4472.77',
  90         '92.0.4515.107',
  91         '92.0.4515.115',
  92         '92.0.4515.131',
  93         '92.0.4515.159',
  94         '92.0.4515.43',
  95         '93.0.4556.0',
  96         '93.0.4577.15',
  97         '93.0.4577.63',
  98         '93.0.4577.82',
  99         '94.0.4606.41',
 100         '94.0.4606.54',
 101         '94.0.4606.61',
 102         '94.0.4606.71',
 103         '94.0.4606.81',
 104         '94.0.4606.85',
 105         '95.0.4638.17',
 106         '95.0.4638.50',
 107         '95.0.4638.54',
 108         '95.0.4638.69',
 109         '95.0.4638.74',
 110         '96.0.4664.18',
 111         '96.0.4664.45',
 112         '96.0.4664.55',
 113         '96.0.4664.93',
 114         '97.0.4692.20',
 115     )
 116     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 117
 118
 119 SUPPORTED_ENCODINGS = [
 120     'gzip', 'deflate'
 121 ]
 122 if brotli:
 123     SUPPORTED_ENCODINGS.append('br')
 124
 125 std_headers = {
 126     'User-Agent': random_user_agent(),
 127     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 128     'Accept-Language': 'en-us,en;q=0.5',
 129     'Sec-Fetch-Mode': 'navigate',
 130 }
 131
 132
 133 USER_AGENTS = {
 134     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 135 }
 136
 137
 138 NO_DEFAULT = object()
 139 IDENTITY = lambda x: x
 140
 141 ENGLISH_MONTH_NAMES = [
 142     'January', 'February', 'March', 'April', 'May', 'June',
 143     'July', 'August', 'September', 'October', 'November', 'December']
 144
 145 MONTH_NAMES = {
 146     'en': ENGLISH_MONTH_NAMES,
 147     'fr': [
 148         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 149         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 150 }
 151
 152 KNOWN_EXTENSIONS = (
 153     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 154     'flv', 'f4v', 'f4a', 'f4b',
 155     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 156     'mkv', 'mka', 'mk3d',
 157     'avi', 'divx',
 158     'mov',
 159     'asf', 'wmv', 'wma',
 160     '3gp', '3g2',
 161     'mp3',
 162     'flac',
 163     'ape',
 164     'wav',
 165     'f4f', 'f4m', 'm3u8', 'smil')
 166
 167 # needed for sanitizing filenames in restricted mode
 168 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 169                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 170                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 171
 172 DATE_FORMATS = (
 173     '%d %B %Y',
 174     '%d %b %Y',
 175     '%B %d %Y',
 176     '%B %dst %Y',
 177     '%B %dnd %Y',
 178     '%B %drd %Y',
 179     '%B %dth %Y',
 180     '%b %d %Y',
 181     '%b %dst %Y',
 182     '%b %dnd %Y',
 183     '%b %drd %Y',
 184     '%b %dth %Y',
 185     '%b %dst %Y %I:%M',
 186     '%b %dnd %Y %I:%M',
 187     '%b %drd %Y %I:%M',
 188     '%b %dth %Y %I:%M',
 189     '%Y %m %d',
 190     '%Y-%m-%d',
 191     '%Y.%m.%d.',
 192     '%Y/%m/%d',
 193     '%Y/%m/%d %H:%M',
 194     '%Y/%m/%d %H:%M:%S',
 195     '%Y%m%d%H%M',
 196     '%Y%m%d%H%M%S',
 197     '%Y%m%d',
 198     '%Y-%m-%d %H:%M',
 199     '%Y-%m-%d %H:%M:%S',
 200     '%Y-%m-%d %H:%M:%S.%f',
 201     '%Y-%m-%d %H:%M:%S:%f',
 202     '%d.%m.%Y %H:%M',
 203     '%d.%m.%Y %H.%M',
 204     '%Y-%m-%dT%H:%M:%SZ',
 205     '%Y-%m-%dT%H:%M:%S.%fZ',
 206     '%Y-%m-%dT%H:%M:%S.%f0Z',
 207     '%Y-%m-%dT%H:%M:%S',
 208     '%Y-%m-%dT%H:%M:%S.%f',
 209     '%Y-%m-%dT%H:%M',
 210     '%b %d %Y at %H:%M',
 211     '%b %d %Y at %H:%M:%S',
 212     '%B %d %Y at %H:%M',
 213     '%B %d %Y at %H:%M:%S',
 214     '%H:%M %d-%b-%Y',
 215 )
 216
 217 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 218 DATE_FORMATS_DAY_FIRST.extend([
 219     '%d-%m-%Y',
 220     '%d.%m.%Y',
 221     '%d.%m.%y',
 222     '%d/%m/%Y',
 223     '%d/%m/%y',
 224     '%d/%m/%Y %H:%M:%S',
 225 ])
 226
 227 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 228 DATE_FORMATS_MONTH_FIRST.extend([
 229     '%m-%d-%Y',
 230     '%m.%d.%Y',
 231     '%m/%d/%Y',
 232     '%m/%d/%y',
 233     '%m/%d/%Y %H:%M:%S',
 234 ])
 235
 236 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 237 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?})\s*</script>'
 238
 239 NUMBER_RE = r'\d+(?:\.\d+)?'
 240
 241
 242 @functools.cache
 243 def preferredencoding():
 244     """Get preferred encoding.
 245
 246     Returns the best encoding scheme for the system, based on
 247     locale.getpreferredencoding() and some further tweaks.
 248     """
 249     try:
 250         pref = locale.getpreferredencoding()
 251         'TEST'.encode(pref)
 252     except Exception:
 253         pref = 'UTF-8'
 254
 255     return pref
 256
 257
 258 def write_json_file(obj, fn):
 259     """ Encode obj as JSON and write it to fn, atomically if possible """
 260
 261     tf = tempfile.NamedTemporaryFile(
 262         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 263         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 264
 265     try:
 266         with tf:
 267             json.dump(obj, tf, ensure_ascii=False)
 268         if sys.platform == 'win32':
 269             # Need to remove existing file on Windows, else os.rename raises
 270             # WindowsError or FileExistsError.
 271             with contextlib.suppress(OSError):
 272                 os.unlink(fn)
 273         with contextlib.suppress(OSError):
 274             mask = os.umask(0)
 275             os.umask(mask)
 276             os.chmod(tf.name, 0o666 & ~mask)
 277         os.rename(tf.name, fn)
 278     except Exception:
 279         with contextlib.suppress(OSError):
 280             os.remove(tf.name)
 281         raise
 282
 283
 284 def find_xpath_attr(node, xpath, key, val=None):
 285     """ Find the xpath xpath[@key=val] """
 286     assert re.match(r'^[a-zA-Z_-]+$', key)
 287     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 288     return node.find(expr)
 289
 290 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 291 # the namespace parameter
 292
 293
 294 def xpath_with_ns(path, ns_map):
 295     components = [c.split(':') for c in path.split('/')]
 296     replaced = []
 297     for c in components:
 298         if len(c) == 1:
 299             replaced.append(c[0])
 300         else:
 301             ns, tag = c
 302             replaced.append('{%s}%s' % (ns_map[ns], tag))
 303     return '/'.join(replaced)
 304
 305
 306 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 307     def _find_xpath(xpath):
 308         return node.find(xpath)
 309
 310     if isinstance(xpath, str):
 311         n = _find_xpath(xpath)
 312     else:
 313         for xp in xpath:
 314             n = _find_xpath(xp)
 315             if n is not None:
 316                 break
 317
 318     if n is None:
 319         if default is not NO_DEFAULT:
 320             return default
 321         elif fatal:
 322             name = xpath if name is None else name
 323             raise ExtractorError('Could not find XML element %s' % name)
 324         else:
 325             return None
 326     return n
 327
 328
 329 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 330     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 331     if n is None or n == default:
 332         return n
 333     if n.text is None:
 334         if default is not NO_DEFAULT:
 335             return default
 336         elif fatal:
 337             name = xpath if name is None else name
 338             raise ExtractorError('Could not find XML element\'s text %s' % name)
 339         else:
 340             return None
 341     return n.text
 342
 343
 344 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 345     n = find_xpath_attr(node, xpath, key)
 346     if n is None:
 347         if default is not NO_DEFAULT:
 348             return default
 349         elif fatal:
 350             name = f'{xpath}[@{key}]' if name is None else name
 351             raise ExtractorError('Could not find XML attribute %s' % name)
 352         else:
 353             return None
 354     return n.attrib[key]
 355
 356
 357 def get_element_by_id(id, html, **kwargs):
 358     """Return the content of the tag with the specified ID in the passed HTML document"""
 359     return get_element_by_attribute('id', id, html, **kwargs)
 360
 361
 362 def get_element_html_by_id(id, html, **kwargs):
 363     """Return the html of the tag with the specified ID in the passed HTML document"""
 364     return get_element_html_by_attribute('id', id, html, **kwargs)
 365
 366
 367 def get_element_by_class(class_name, html):
 368     """Return the content of the first tag with the specified class in the passed HTML document"""
 369     retval = get_elements_by_class(class_name, html)
 370     return retval[0] if retval else None
 371
 372
 373 def get_element_html_by_class(class_name, html):
 374     """Return the html of the first tag with the specified class in the passed HTML document"""
 375     retval = get_elements_html_by_class(class_name, html)
 376     return retval[0] if retval else None
 377
 378
 379 def get_element_by_attribute(attribute, value, html, **kwargs):
 380     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 381     return retval[0] if retval else None
 382
 383
 384 def get_element_html_by_attribute(attribute, value, html, **kargs):
 385     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 386     return retval[0] if retval else None
 387
 388
 389 def get_elements_by_class(class_name, html, **kargs):
 390     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 391     return get_elements_by_attribute(
 392         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 393         html, escape_value=False)
 394
 395
 396 def get_elements_html_by_class(class_name, html):
 397     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 398     return get_elements_html_by_attribute(
 399         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 400         html, escape_value=False)
 401
 402
 403 def get_elements_by_attribute(*args, **kwargs):
 404     """Return the content of the tag with the specified attribute in the passed HTML document"""
 405     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 406
 407
 408 def get_elements_html_by_attribute(*args, **kwargs):
 409     """Return the html of the tag with the specified attribute in the passed HTML document"""
 410     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 411
 412
 413 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 414     """
 415     Return the text (content) and the html (whole) of the tag with the specified
 416     attribute in the passed HTML document
 417     """
 418
 419     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 420
 421     value = re.escape(value) if escape_value else value
 422
 423     partial_element_re = rf'''(?x)
 424         <(?P<tag>[a-zA-Z0-9:._-]+)
 425          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 426          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 427         '''
 428
 429     for m in re.finditer(partial_element_re, html):
 430         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 431
 432         yield (
 433             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 434             whole
 435         )
 436
 437
 438 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
 439     """
 440     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 441     closing tag for the first opening tag it has encountered, and can be used
 442     as a context manager
 443     """
 444
 445     class HTMLBreakOnClosingTagException(Exception):
 446         pass
 447
 448     def __init__(self):
 449         self.tagstack = collections.deque()
 450         html.parser.HTMLParser.__init__(self)
 451
 452     def __enter__(self):
 453         return self
 454
 455     def __exit__(self, *_):
 456         self.close()
 457
 458     def close(self):
 459         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 460         # so data remains buffered; we no longer have any interest in it, thus
 461         # override this method to discard it
 462         pass
 463
 464     def handle_starttag(self, tag, _):
 465         self.tagstack.append(tag)
 466
 467     def handle_endtag(self, tag):
 468         if not self.tagstack:
 469             raise compat_HTMLParseError('no tags in the stack')
 470         while self.tagstack:
 471             inner_tag = self.tagstack.pop()
 472             if inner_tag == tag:
 473                 break
 474         else:
 475             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 476         if not self.tagstack:
 477             raise self.HTMLBreakOnClosingTagException()
 478
 479
 480 def get_element_text_and_html_by_tag(tag, html):
 481     """
 482     For the first element with the specified tag in the passed HTML document
 483     return its' content (text) and the whole element (html)
 484     """
 485     def find_or_raise(haystack, needle, exc):
 486         try:
 487             return haystack.index(needle)
 488         except ValueError:
 489             raise exc
 490     closing_tag = f'</{tag}>'
 491     whole_start = find_or_raise(
 492         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 493     content_start = find_or_raise(
 494         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 495     content_start += whole_start + 1
 496     with HTMLBreakOnClosingTagParser() as parser:
 497         parser.feed(html[whole_start:content_start])
 498         if not parser.tagstack or parser.tagstack[0] != tag:
 499             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 500         offset = content_start
 501         while offset < len(html):
 502             next_closing_tag_start = find_or_raise(
 503                 html[offset:], closing_tag,
 504                 compat_HTMLParseError(f'closing {tag} tag not found'))
 505             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 506             try:
 507                 parser.feed(html[offset:offset + next_closing_tag_end])
 508                 offset += next_closing_tag_end
 509             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 510                 return html[content_start:offset + next_closing_tag_start], \
 511                     html[whole_start:offset + next_closing_tag_end]
 512         raise compat_HTMLParseError('unexpected end of html')
 513
 514
 515 class HTMLAttributeParser(html.parser.HTMLParser):
 516     """Trivial HTML parser to gather the attributes for a single element"""
 517
 518     def __init__(self):
 519         self.attrs = {}
 520         html.parser.HTMLParser.__init__(self)
 521
 522     def handle_starttag(self, tag, attrs):
 523         self.attrs = dict(attrs)
 524
 525
 526 class HTMLListAttrsParser(html.parser.HTMLParser):
 527     """HTML parser to gather the attributes for the elements of a list"""
 528
 529     def __init__(self):
 530         html.parser.HTMLParser.__init__(self)
 531         self.items = []
 532         self._level = 0
 533
 534     def handle_starttag(self, tag, attrs):
 535         if tag == 'li' and self._level == 0:
 536             self.items.append(dict(attrs))
 537         self._level += 1
 538
 539     def handle_endtag(self, tag):
 540         self._level -= 1
 541
 542
 543 def extract_attributes(html_element):
 544     """Given a string for an HTML element such as
 545     <el
 546          a="foo" B="bar" c="&98;az" d=boz
 547          empty= noval entity="&amp;"
 548          sq='"' dq="'"
 549     >
 550     Decode and return a dictionary of attributes.
 551     {
 552         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 553         'empty': '', 'noval': None, 'entity': '&',
 554         'sq': '"', 'dq': '\''
 555     }.
 556     """
 557     parser = HTMLAttributeParser()
 558     with contextlib.suppress(compat_HTMLParseError):
 559         parser.feed(html_element)
 560         parser.close()
 561     return parser.attrs
 562
 563
 564 def parse_list(webpage):
 565     """Given a string for an series of HTML <li> elements,
 566     return a dictionary of their attributes"""
 567     parser = HTMLListAttrsParser()
 568     parser.feed(webpage)
 569     parser.close()
 570     return parser.items
 571
 572
 573 def clean_html(html):
 574     """Clean an HTML snippet into a readable string"""
 575
 576     if html is None:  # Convenience for sanitizing descriptions etc.
 577         return html
 578
 579     html = re.sub(r'\s+', ' ', html)
 580     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 581     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 582     # Strip html tags
 583     html = re.sub('<.*?>', '', html)
 584     # Replace html entities
 585     html = unescapeHTML(html)
 586     return html.strip()
 587
 588
 589 class LenientJSONDecoder(json.JSONDecoder):
 590     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 591         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 592         super().__init__(*args, **kwargs)
 593
 594     def decode(self, s):
 595         if self.transform_source:
 596             s = self.transform_source(s)
 597         if self.ignore_extra:
 598             return self.raw_decode(s.lstrip())[0]
 599         return super().decode(s)
 600
 601
 602 def sanitize_open(filename, open_mode):
 603     """Try to open the given filename, and slightly tweak it if this fails.
 604
 605     Attempts to open the given filename. If this fails, it tries to change
 606     the filename slightly, step by step, until it's either able to open it
 607     or it fails and raises a final exception, like the standard open()
 608     function.
 609
 610     It returns the tuple (stream, definitive_file_name).
 611     """
 612     if filename == '-':
 613         if sys.platform == 'win32':
 614             import msvcrt
 615             msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 616         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 617
 618     for attempt in range(2):
 619         try:
 620             try:
 621                 if sys.platform == 'win32':
 622                     # FIXME: An exclusive lock also locks the file from being read.
 623                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 624                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 625                     raise LockingUnsupportedError()
 626                 stream = locked_file(filename, open_mode, block=False).__enter__()
 627             except OSError:
 628                 stream = open(filename, open_mode)
 629             return stream, filename
 630         except OSError as err:
 631             if attempt or err.errno in (errno.EACCES,):
 632                 raise
 633             old_filename, filename = filename, sanitize_path(filename)
 634             if old_filename == filename:
 635                 raise
 636
 637
 638 def timeconvert(timestr):
 639     """Convert RFC 2822 defined time string into system timestamp"""
 640     timestamp = None
 641     timetuple = email.utils.parsedate_tz(timestr)
 642     if timetuple is not None:
 643         timestamp = email.utils.mktime_tz(timetuple)
 644     return timestamp
 645
 646
 647 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 648     """Sanitizes a string so it could be used as part of a filename.
 649     @param restricted   Use a stricter subset of allowed characters
 650     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 651                         If unset, yt-dlp's new sanitization rules are in effect
 652     """
 653     if s == '':
 654         return ''
 655
 656     def replace_insane(char):
 657         if restricted and char in ACCENT_CHARS:
 658             return ACCENT_CHARS[char]
 659         elif not restricted and char == '\n':
 660             return '\0 '
 661         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 662             return ''
 663         elif char == '"':
 664             return '' if restricted else '\''
 665         elif char == ':':
 666             return '\0_\0-' if restricted else '\0 \0-'
 667         elif char in '\\/|*<>':
 668             return '\0_'
 669         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 670             return '\0_'
 671         return char
 672
 673     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 674     result = ''.join(map(replace_insane, s))
 675     if is_id is NO_DEFAULT:
 676         result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
 677         STRIP_RE = r'(?:\0.|[ _-])*'
 678         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 679     result = result.replace('\0', '') or '_'
 680
 681     if not is_id:
 682         while '__' in result:
 683             result = result.replace('__', '_')
 684         result = result.strip('_')
 685         # Common case of "Foreign band name - English song title"
 686         if restricted and result.startswith('-_'):
 687             result = result[2:]
 688         if result.startswith('-'):
 689             result = '_' + result[len('-'):]
 690         result = result.lstrip('.')
 691         if not result:
 692             result = '_'
 693     return result
 694
 695
 696 def sanitize_path(s, force=False):
 697     """Sanitizes and normalizes path on Windows"""
 698     if sys.platform == 'win32':
 699         force = False
 700         drive_or_unc, _ = os.path.splitdrive(s)
 701     elif force:
 702         drive_or_unc = ''
 703     else:
 704         return s
 705
 706     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 707     if drive_or_unc:
 708         norm_path.pop(0)
 709     sanitized_path = [
 710         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 711         for path_part in norm_path]
 712     if drive_or_unc:
 713         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 714     elif force and s and s[0] == os.path.sep:
 715         sanitized_path.insert(0, os.path.sep)
 716     return os.path.join(*sanitized_path)
 717
 718
 719 def sanitize_url(url):
 720     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 721     # the number of unwanted failures due to missing protocol
 722     if url is None:
 723         return
 724     elif url.startswith('//'):
 725         return 'http:%s' % url
 726     # Fix some common typos seen so far
 727     COMMON_TYPOS = (
 728         # https://github.com/ytdl-org/youtube-dl/issues/15649
 729         (r'^httpss://', r'https://'),
 730         # https://bx1.be/lives/direct-tv/
 731         (r'^rmtp([es]?)://', r'rtmp\1://'),
 732     )
 733     for mistake, fixup in COMMON_TYPOS:
 734         if re.match(mistake, url):
 735             return re.sub(mistake, fixup, url)
 736     return url
 737
 738
 739 def extract_basic_auth(url):
 740     parts = urllib.parse.urlsplit(url)
 741     if parts.username is None:
 742         return url, None
 743     url = urllib.parse.urlunsplit(parts._replace(netloc=(
 744         parts.hostname if parts.port is None
 745         else '%s:%d' % (parts.hostname, parts.port))))
 746     auth_payload = base64.b64encode(
 747         ('%s:%s' % (parts.username, parts.password or '')).encode())
 748     return url, f'Basic {auth_payload.decode()}'
 749
 750
 751 def sanitized_Request(url, *args, **kwargs):
 752     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 753     if auth_header is not None:
 754         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 755         headers['Authorization'] = auth_header
 756     return urllib.request.Request(url, *args, **kwargs)
 757
 758
 759 def expand_path(s):
 760     """Expand shell variables and ~"""
 761     return os.path.expandvars(compat_expanduser(s))
 762
 763
 764 def orderedSet(iterable, *, lazy=False):
 765     """Remove all duplicates from the input iterable"""
 766     def _iter():
 767         seen = []  # Do not use set since the items can be unhashable
 768         for x in iterable:
 769             if x not in seen:
 770                 seen.append(x)
 771                 yield x
 772
 773     return _iter() if lazy else list(_iter())
 774
 775
 776 def _htmlentity_transform(entity_with_semicolon):
 777     """Transforms an HTML entity to a character."""
 778     entity = entity_with_semicolon[:-1]
 779
 780     # Known non-numeric HTML entity
 781     if entity in html.entities.name2codepoint:
 782         return chr(html.entities.name2codepoint[entity])
 783
 784     # TODO: HTML5 allows entities without a semicolon. For example,
 785     # '&Eacuteric' should be decoded as 'Éric'.
 786     if entity_with_semicolon in html.entities.html5:
 787         return html.entities.html5[entity_with_semicolon]
 788
 789     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 790     if mobj is not None:
 791         numstr = mobj.group(1)
 792         if numstr.startswith('x'):
 793             base = 16
 794             numstr = '0%s' % numstr
 795         else:
 796             base = 10
 797         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 798         with contextlib.suppress(ValueError):
 799             return chr(int(numstr, base))
 800
 801     # Unknown entity in name, return its literal representation
 802     return '&%s;' % entity
 803
 804
 805 def unescapeHTML(s):
 806     if s is None:
 807         return None
 808     assert isinstance(s, str)
 809
 810     return re.sub(
 811         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 812
 813
 814 def escapeHTML(text):
 815     return (
 816         text
 817         .replace('&', '&amp;')
 818         .replace('<', '&lt;')
 819         .replace('>', '&gt;')
 820         .replace('"', '&quot;')
 821         .replace("'", '&#39;')
 822     )
 823
 824
 825 def process_communicate_or_kill(p, *args, **kwargs):
 826     write_string('DeprecationWarning: yt_dlp.utils.process_communicate_or_kill is deprecated '
 827                  'and may be removed in a future version. Use yt_dlp.utils.Popen.communicate_or_kill instead')
 828     return Popen.communicate_or_kill(p, *args, **kwargs)
 829
 830
 831 class Popen(subprocess.Popen):
 832     if sys.platform == 'win32':
 833         _startupinfo = subprocess.STARTUPINFO()
 834         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 835     else:
 836         _startupinfo = None
 837
 838     def __init__(self, *args, text=False, **kwargs):
 839         if text is True:
 840             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 841             kwargs.setdefault('encoding', 'utf-8')
 842             kwargs.setdefault('errors', 'replace')
 843         super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
 844
 845     def communicate_or_kill(self, *args, **kwargs):
 846         try:
 847             return self.communicate(*args, **kwargs)
 848         except BaseException:  # Including KeyboardInterrupt
 849             self.kill(timeout=None)
 850             raise
 851
 852     def kill(self, *, timeout=0):
 853         super().kill()
 854         if timeout != 0:
 855             self.wait(timeout=timeout)
 856
 857     @classmethod
 858     def run(cls, *args, **kwargs):
 859         with cls(*args, **kwargs) as proc:
 860             stdout, stderr = proc.communicate_or_kill()
 861             return stdout or '', stderr or '', proc.returncode
 862
 863
 864 def get_subprocess_encoding():
 865     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 866         # For subprocess calls, encode with locale encoding
 867         # Refer to http://stackoverflow.com/a/9951851/35070
 868         encoding = preferredencoding()
 869     else:
 870         encoding = sys.getfilesystemencoding()
 871     if encoding is None:
 872         encoding = 'utf-8'
 873     return encoding
 874
 875
 876 def encodeFilename(s, for_subprocess=False):
 877     assert isinstance(s, str)
 878     return s
 879
 880
 881 def decodeFilename(b, for_subprocess=False):
 882     return b
 883
 884
 885 def encodeArgument(s):
 886     # Legacy code that uses byte strings
 887     # Uncomment the following line after fixing all post processors
 888     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
 889     return s if isinstance(s, str) else s.decode('ascii')
 890
 891
 892 def decodeArgument(b):
 893     return b
 894
 895
 896 def decodeOption(optval):
 897     if optval is None:
 898         return optval
 899     if isinstance(optval, bytes):
 900         optval = optval.decode(preferredencoding())
 901
 902     assert isinstance(optval, str)
 903     return optval
 904
 905
 906 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 907
 908
 909 def timetuple_from_msec(msec):
 910     secs, msec = divmod(msec, 1000)
 911     mins, secs = divmod(secs, 60)
 912     hrs, mins = divmod(mins, 60)
 913     return _timetuple(hrs, mins, secs, msec)
 914
 915
 916 def formatSeconds(secs, delim=':', msec=False):
 917     time = timetuple_from_msec(secs * 1000)
 918     if time.hours:
 919         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 920     elif time.minutes:
 921         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 922     else:
 923         ret = '%d' % time.seconds
 924     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 925
 926
 927 def _ssl_load_windows_store_certs(ssl_context, storename):
 928     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 929     try:
 930         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 931                  if encoding == 'x509_asn' and (
 932                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 933     except PermissionError:
 934         return
 935     for cert in certs:
 936         with contextlib.suppress(ssl.SSLError):
 937             ssl_context.load_verify_locations(cadata=cert)
 938
 939
 940 def make_HTTPS_handler(params, **kwargs):
 941     opts_check_certificate = not params.get('nocheckcertificate')
 942     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 943     context.check_hostname = opts_check_certificate
 944     if params.get('legacyserverconnect'):
 945         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 946         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 947         context.set_ciphers('DEFAULT')
 948
 949     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 950     if opts_check_certificate:
 951         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 952             context.load_verify_locations(cafile=certifi.where())
 953         try:
 954             context.load_default_certs()
 955         # Work around the issue in load_default_certs when there are bad certificates. See:
 956         # https://github.com/yt-dlp/yt-dlp/issues/1060,
 957         # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 958         except ssl.SSLError:
 959             # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 960             if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 961                 for storename in ('CA', 'ROOT'):
 962                     _ssl_load_windows_store_certs(context, storename)
 963             context.set_default_verify_paths()
 964
 965     client_certfile = params.get('client_certificate')
 966     if client_certfile:
 967         try:
 968             context.load_cert_chain(
 969                 client_certfile, keyfile=params.get('client_certificate_key'),
 970                 password=params.get('client_certificate_password'))
 971         except ssl.SSLError:
 972             raise YoutubeDLError('Unable to load client certificate')
 973
 974     # Some servers may reject requests if ALPN extension is not sent. See:
 975     # https://github.com/python/cpython/issues/85140
 976     # https://github.com/yt-dlp/yt-dlp/issues/3878
 977     with contextlib.suppress(NotImplementedError):
 978         context.set_alpn_protocols(['http/1.1'])
 979
 980     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 981
 982
 983 def bug_reports_message(before=';'):
 984     from .update import REPOSITORY
 985
 986     msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
 987            'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')
 988
 989     before = before.rstrip()
 990     if not before or before.endswith(('.', '!', '?')):
 991         msg = msg[0].title() + msg[1:]
 992
 993     return (before + ' ' if before else '') + msg
 994
 995
 996 class YoutubeDLError(Exception):
 997     """Base exception for YoutubeDL errors."""
 998     msg = None
 999
1000     def __init__(self, msg=None):
1001         if msg is not None:
1002             self.msg = msg
1003         elif self.msg is None:
1004             self.msg = type(self).__name__
1005         super().__init__(self.msg)
1006
1007
1008 network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
1009 if hasattr(ssl, 'CertificateError'):
1010     network_exceptions.append(ssl.CertificateError)
1011 network_exceptions = tuple(network_exceptions)
1012
1013
1014 class ExtractorError(YoutubeDLError):
1015     """Error during info extraction."""
1016
1017     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1018         """ tb, if given, is the original traceback (so that it can be printed out).
1019         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1020         """
1021         if sys.exc_info()[0] in network_exceptions:
1022             expected = True
1023
1024         self.orig_msg = str(msg)
1025         self.traceback = tb
1026         self.expected = expected
1027         self.cause = cause
1028         self.video_id = video_id
1029         self.ie = ie
1030         self.exc_info = sys.exc_info()  # preserve original exception
1031         if isinstance(self.exc_info[1], ExtractorError):
1032             self.exc_info = self.exc_info[1].exc_info
1033
1034         super().__init__(''.join((
1035             format_field(ie, None, '[%s] '),
1036             format_field(video_id, None, '%s: '),
1037             msg,
1038             format_field(cause, None, ' (caused by %r)'),
1039             '' if expected else bug_reports_message())))
1040
1041     def format_traceback(self):
1042         return join_nonempty(
1043             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1044             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1045             delim='\n') or None
1046
1047
1048 class UnsupportedError(ExtractorError):
1049     def __init__(self, url):
1050         super().__init__(
1051             'Unsupported URL: %s' % url, expected=True)
1052         self.url = url
1053
1054
1055 class RegexNotFoundError(ExtractorError):
1056     """Error when a regex didn't match"""
1057     pass
1058
1059
1060 class GeoRestrictedError(ExtractorError):
1061     """Geographic restriction Error exception.
1062
1063     This exception may be thrown when a video is not available from your
1064     geographic location due to geographic restrictions imposed by a website.
1065     """
1066
1067     def __init__(self, msg, countries=None, **kwargs):
1068         kwargs['expected'] = True
1069         super().__init__(msg, **kwargs)
1070         self.countries = countries
1071
1072
1073 class DownloadError(YoutubeDLError):
1074     """Download Error exception.
1075
1076     This exception may be thrown by FileDownloader objects if they are not
1077     configured to continue on errors. They will contain the appropriate
1078     error message.
1079     """
1080
1081     def __init__(self, msg, exc_info=None):
1082         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1083         super().__init__(msg)
1084         self.exc_info = exc_info
1085
1086
1087 class EntryNotInPlaylist(YoutubeDLError):
1088     """Entry not in playlist exception.
1089
1090     This exception will be thrown by YoutubeDL when a requested entry
1091     is not found in the playlist info_dict
1092     """
1093     msg = 'Entry not found in info'
1094
1095
1096 class SameFileError(YoutubeDLError):
1097     """Same File exception.
1098
1099     This exception will be thrown by FileDownloader objects if they detect
1100     multiple files would have to be downloaded to the same file on disk.
1101     """
1102     msg = 'Fixed output name but more than one file to download'
1103
1104     def __init__(self, filename=None):
1105         if filename is not None:
1106             self.msg += f': {filename}'
1107         super().__init__(self.msg)
1108
1109
1110 class PostProcessingError(YoutubeDLError):
1111     """Post Processing exception.
1112
1113     This exception may be raised by PostProcessor's .run() method to
1114     indicate an error in the postprocessing task.
1115     """
1116
1117
1118 class DownloadCancelled(YoutubeDLError):
1119     """ Exception raised when the download queue should be interrupted """
1120     msg = 'The download was cancelled'
1121
1122
1123 class ExistingVideoReached(DownloadCancelled):
1124     """ --break-on-existing triggered """
1125     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1126
1127
1128 class RejectedVideoReached(DownloadCancelled):
1129     """ --break-on-reject triggered """
1130     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1131
1132
1133 class MaxDownloadsReached(DownloadCancelled):
1134     """ --max-downloads limit has been reached. """
1135     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1136
1137
1138 class ReExtractInfo(YoutubeDLError):
1139     """ Video info needs to be re-extracted. """
1140
1141     def __init__(self, msg, expected=False):
1142         super().__init__(msg)
1143         self.expected = expected
1144
1145
1146 class ThrottledDownload(ReExtractInfo):
1147     """ Download speed below --throttled-rate. """
1148     msg = 'The download speed is below throttle limit'
1149
1150     def __init__(self):
1151         super().__init__(self.msg, expected=False)
1152
1153
1154 class UnavailableVideoError(YoutubeDLError):
1155     """Unavailable Format exception.
1156
1157     This exception will be thrown when a video is requested
1158     in a format that is not available for that video.
1159     """
1160     msg = 'Unable to download video'
1161
1162     def __init__(self, err=None):
1163         if err is not None:
1164             self.msg += f': {err}'
1165         super().__init__(self.msg)
1166
1167
1168 class ContentTooShortError(YoutubeDLError):
1169     """Content Too Short exception.
1170
1171     This exception may be raised by FileDownloader objects when a file they
1172     download is too small for what the server announced first, indicating
1173     the connection was probably interrupted.
1174     """
1175
1176     def __init__(self, downloaded, expected):
1177         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1178         # Both in bytes
1179         self.downloaded = downloaded
1180         self.expected = expected
1181
1182
1183 class XAttrMetadataError(YoutubeDLError):
1184     def __init__(self, code=None, msg='Unknown error'):
1185         super().__init__(msg)
1186         self.code = code
1187         self.msg = msg
1188
1189         # Parsing code and msg
1190         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1191                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1192             self.reason = 'NO_SPACE'
1193         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1194             self.reason = 'VALUE_TOO_LONG'
1195         else:
1196             self.reason = 'NOT_SUPPORTED'
1197
1198
1199 class XAttrUnavailableError(YoutubeDLError):
1200     pass
1201
1202
1203 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1204     hc = http_class(*args, **kwargs)
1205     source_address = ydl_handler._params.get('source_address')
1206
1207     if source_address is not None:
1208         # This is to workaround _create_connection() from socket where it will try all
1209         # address data from getaddrinfo() including IPv6. This filters the result from
1210         # getaddrinfo() based on the source_address value.
1211         # This is based on the cpython socket.create_connection() function.
1212         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1213         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1214             host, port = address
1215             err = None
1216             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1217             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1218             ip_addrs = [addr for addr in addrs if addr[0] == af]
1219             if addrs and not ip_addrs:
1220                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1221                 raise OSError(
1222                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1223                     % (ip_version, source_address[0]))
1224             for res in ip_addrs:
1225                 af, socktype, proto, canonname, sa = res
1226                 sock = None
1227                 try:
1228                     sock = socket.socket(af, socktype, proto)
1229                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1230                         sock.settimeout(timeout)
1231                     sock.bind(source_address)
1232                     sock.connect(sa)
1233                     err = None  # Explicitly break reference cycle
1234                     return sock
1235                 except OSError as _:
1236                     err = _
1237                     if sock is not None:
1238                         sock.close()
1239             if err is not None:
1240                 raise err
1241             else:
1242                 raise OSError('getaddrinfo returns an empty list')
1243         if hasattr(hc, '_create_connection'):
1244             hc._create_connection = _create_connection
1245         hc.source_address = (source_address, 0)
1246
1247     return hc
1248
1249
1250 def handle_youtubedl_headers(headers):
1251     filtered_headers = headers
1252
1253     if 'Youtubedl-no-compression' in filtered_headers:
1254         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1255         del filtered_headers['Youtubedl-no-compression']
1256
1257     return filtered_headers
1258
1259
1260 class YoutubeDLHandler(urllib.request.HTTPHandler):
1261     """Handler for HTTP requests and responses.
1262
1263     This class, when installed with an OpenerDirector, automatically adds
1264     the standard headers to every HTTP request and handles gzipped and
1265     deflated responses from web servers. If compression is to be avoided in
1266     a particular request, the original request in the program code only has
1267     to include the HTTP header "Youtubedl-no-compression", which will be
1268     removed before making the real request.
1269
1270     Part of this code was copied from:
1271
1272     http://techknack.net/python-urllib2-handlers/
1273
1274     Andrew Rowls, the author of that code, agreed to release it to the
1275     public domain.
1276     """
1277
1278     def __init__(self, params, *args, **kwargs):
1279         urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
1280         self._params = params
1281
1282     def http_open(self, req):
1283         conn_class = http.client.HTTPConnection
1284
1285         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1286         if socks_proxy:
1287             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1288             del req.headers['Ytdl-socks-proxy']
1289
1290         return self.do_open(functools.partial(
1291             _create_http_connection, self, conn_class, False),
1292             req)
1293
1294     @staticmethod
1295     def deflate(data):
1296         if not data:
1297             return data
1298         try:
1299             return zlib.decompress(data, -zlib.MAX_WBITS)
1300         except zlib.error:
1301             return zlib.decompress(data)
1302
1303     @staticmethod
1304     def brotli(data):
1305         if not data:
1306             return data
1307         return brotli.decompress(data)
1308
1309     def http_request(self, req):
1310         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1311         # always respected by websites, some tend to give out URLs with non percent-encoded
1312         # non-ASCII characters (see telemb.py, ard.py [#3412])
1313         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1314         # To work around aforementioned issue we will replace request's original URL with
1315         # percent-encoded one
1316         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1317         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1318         url = req.get_full_url()
1319         url_escaped = escape_url(url)
1320
1321         # Substitute URL if any change after escaping
1322         if url != url_escaped:
1323             req = update_Request(req, url=url_escaped)
1324
1325         for h, v in self._params.get('http_headers', std_headers).items():
1326             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1327             # The dict keys are capitalized because of this bug by urllib
1328             if h.capitalize() not in req.headers:
1329                 req.add_header(h, v)
1330
1331         if 'Accept-encoding' not in req.headers:
1332             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1333
1334         req.headers = handle_youtubedl_headers(req.headers)
1335
1336         return super().do_request_(req)
1337
1338     def http_response(self, req, resp):
1339         old_resp = resp
1340         # gzip
1341         if resp.headers.get('Content-encoding', '') == 'gzip':
1342             content = resp.read()
1343             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1344             try:
1345                 uncompressed = io.BytesIO(gz.read())
1346             except OSError as original_ioerror:
1347                 # There may be junk add the end of the file
1348                 # See http://stackoverflow.com/q/4928560/35070 for details
1349                 for i in range(1, 1024):
1350                     try:
1351                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1352                         uncompressed = io.BytesIO(gz.read())
1353                     except OSError:
1354                         continue
1355                     break
1356                 else:
1357                     raise original_ioerror
1358             resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1359             resp.msg = old_resp.msg
1360             del resp.headers['Content-encoding']
1361         # deflate
1362         if resp.headers.get('Content-encoding', '') == 'deflate':
1363             gz = io.BytesIO(self.deflate(resp.read()))
1364             resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1365             resp.msg = old_resp.msg
1366             del resp.headers['Content-encoding']
1367         # brotli
1368         if resp.headers.get('Content-encoding', '') == 'br':
1369             resp = urllib.request.addinfourl(
1370                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1371             resp.msg = old_resp.msg
1372             del resp.headers['Content-encoding']
1373         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1374         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1375         if 300 <= resp.code < 400:
1376             location = resp.headers.get('Location')
1377             if location:
1378                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1379                 location = location.encode('iso-8859-1').decode()
1380                 location_escaped = escape_url(location)
1381                 if location != location_escaped:
1382                     del resp.headers['Location']
1383                     resp.headers['Location'] = location_escaped
1384         return resp
1385
1386     https_request = http_request
1387     https_response = http_response
1388
1389
1390 def make_socks_conn_class(base_class, socks_proxy):
1391     assert issubclass(base_class, (
1392         http.client.HTTPConnection, http.client.HTTPSConnection))
1393
1394     url_components = urllib.parse.urlparse(socks_proxy)
1395     if url_components.scheme.lower() == 'socks5':
1396         socks_type = ProxyType.SOCKS5
1397     elif url_components.scheme.lower() in ('socks', 'socks4'):
1398         socks_type = ProxyType.SOCKS4
1399     elif url_components.scheme.lower() == 'socks4a':
1400         socks_type = ProxyType.SOCKS4A
1401
1402     def unquote_if_non_empty(s):
1403         if not s:
1404             return s
1405         return urllib.parse.unquote_plus(s)
1406
1407     proxy_args = (
1408         socks_type,
1409         url_components.hostname, url_components.port or 1080,
1410         True,  # Remote DNS
1411         unquote_if_non_empty(url_components.username),
1412         unquote_if_non_empty(url_components.password),
1413     )
1414
1415     class SocksConnection(base_class):
1416         def connect(self):
1417             self.sock = sockssocket()
1418             self.sock.setproxy(*proxy_args)
1419             if isinstance(self.timeout, (int, float)):
1420                 self.sock.settimeout(self.timeout)
1421             self.sock.connect((self.host, self.port))
1422
1423             if isinstance(self, http.client.HTTPSConnection):
1424                 if hasattr(self, '_context'):  # Python > 2.6
1425                     self.sock = self._context.wrap_socket(
1426                         self.sock, server_hostname=self.host)
1427                 else:
1428                     self.sock = ssl.wrap_socket(self.sock)
1429
1430     return SocksConnection
1431
1432
1433 class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
1434     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1435         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
1436         self._https_conn_class = https_conn_class or http.client.HTTPSConnection
1437         self._params = params
1438
1439     def https_open(self, req):
1440         kwargs = {}
1441         conn_class = self._https_conn_class
1442
1443         if hasattr(self, '_context'):  # python > 2.6
1444             kwargs['context'] = self._context
1445         if hasattr(self, '_check_hostname'):  # python 3.x
1446             kwargs['check_hostname'] = self._check_hostname
1447
1448         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1449         if socks_proxy:
1450             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1451             del req.headers['Ytdl-socks-proxy']
1452
1453         try:
1454             return self.do_open(
1455                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1456         except urllib.error.URLError as e:
1457             if (isinstance(e.reason, ssl.SSLError)
1458                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1459                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1460             raise
1461
1462
1463 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1464     """
1465     See [1] for cookie file format.
1466
1467     1. https://curl.haxx.se/docs/http-cookies.html
1468     """
1469     _HTTPONLY_PREFIX = '#HttpOnly_'
1470     _ENTRY_LEN = 7
1471     _HEADER = '''# Netscape HTTP Cookie File
1472 # This file is generated by yt-dlp.  Do not edit.
1473
1474 '''
1475     _CookieFileEntry = collections.namedtuple(
1476         'CookieFileEntry',
1477         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1478
1479     def __init__(self, filename=None, *args, **kwargs):
1480         super().__init__(None, *args, **kwargs)
1481         if self.is_path(filename):
1482             filename = os.fspath(filename)
1483         self.filename = filename
1484
1485     @staticmethod
1486     def _true_or_false(cndn):
1487         return 'TRUE' if cndn else 'FALSE'
1488
1489     @staticmethod
1490     def is_path(file):
1491         return isinstance(file, (str, bytes, os.PathLike))
1492
1493     @contextlib.contextmanager
1494     def open(self, file, *, write=False):
1495         if self.is_path(file):
1496             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1497                 yield f
1498         else:
1499             if write:
1500                 file.truncate(0)
1501             yield file
1502
1503     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1504         now = time.time()
1505         for cookie in self:
1506             if (not ignore_discard and cookie.discard
1507                     or not ignore_expires and cookie.is_expired(now)):
1508                 continue
1509             name, value = cookie.name, cookie.value
1510             if value is None:
1511                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1512                 # with no name, whereas http.cookiejar regards it as a
1513                 # cookie with no value.
1514                 name, value = '', name
1515             f.write('%s\n' % '\t'.join((
1516                 cookie.domain,
1517                 self._true_or_false(cookie.domain.startswith('.')),
1518                 cookie.path,
1519                 self._true_or_false(cookie.secure),
1520                 str_or_none(cookie.expires, default=''),
1521                 name, value
1522             )))
1523
1524     def save(self, filename=None, *args, **kwargs):
1525         """
1526         Save cookies to a file.
1527         Code is taken from CPython 3.6
1528         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1529
1530         if filename is None:
1531             if self.filename is not None:
1532                 filename = self.filename
1533             else:
1534                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1535
1536         # Store session cookies with `expires` set to 0 instead of an empty string
1537         for cookie in self:
1538             if cookie.expires is None:
1539                 cookie.expires = 0
1540
1541         with self.open(filename, write=True) as f:
1542             f.write(self._HEADER)
1543             self._really_save(f, *args, **kwargs)
1544
1545     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1546         """Load cookies from a file."""
1547         if filename is None:
1548             if self.filename is not None:
1549                 filename = self.filename
1550             else:
1551                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1552
1553         def prepare_line(line):
1554             if line.startswith(self._HTTPONLY_PREFIX):
1555                 line = line[len(self._HTTPONLY_PREFIX):]
1556             # comments and empty lines are fine
1557             if line.startswith('#') or not line.strip():
1558                 return line
1559             cookie_list = line.split('\t')
1560             if len(cookie_list) != self._ENTRY_LEN:
1561                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1562             cookie = self._CookieFileEntry(*cookie_list)
1563             if cookie.expires_at and not cookie.expires_at.isdigit():
1564                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1565             return line
1566
1567         cf = io.StringIO()
1568         with self.open(filename) as f:
1569             for line in f:
1570                 try:
1571                     cf.write(prepare_line(line))
1572                 except http.cookiejar.LoadError as e:
1573                     if f'{line.strip()} '[0] in '[{"':
1574                         raise http.cookiejar.LoadError(
1575                             'Cookies file must be Netscape formatted, not JSON. See  '
1576                             'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl')
1577                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1578                     continue
1579         cf.seek(0)
1580         self._really_load(cf, filename, ignore_discard, ignore_expires)
1581         # Session cookies are denoted by either `expires` field set to
1582         # an empty string or 0. MozillaCookieJar only recognizes the former
1583         # (see [1]). So we need force the latter to be recognized as session
1584         # cookies on our own.
1585         # Session cookies may be important for cookies-based authentication,
1586         # e.g. usually, when user does not check 'Remember me' check box while
1587         # logging in on a site, some important cookies are stored as session
1588         # cookies so that not recognizing them will result in failed login.
1589         # 1. https://bugs.python.org/issue17164
1590         for cookie in self:
1591             # Treat `expires=0` cookies as session cookies
1592             if cookie.expires == 0:
1593                 cookie.expires = None
1594                 cookie.discard = True
1595
1596
1597 class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
1598     def __init__(self, cookiejar=None):
1599         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
1600
1601     def http_response(self, request, response):
1602         return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
1603
1604     https_request = urllib.request.HTTPCookieProcessor.http_request
1605     https_response = http_response
1606
1607
1608 class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
1609     """YoutubeDL redirect handler
1610
1611     The code is based on HTTPRedirectHandler implementation from CPython [1].
1612
1613     This redirect handler solves two issues:
1614      - ensures redirect URL is always unicode under python 2
1615      - introduces support for experimental HTTP response status code
1616        308 Permanent Redirect [2] used by some sites [3]
1617
1618     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1619     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1620     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1621     """
1622
1623     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
1624
1625     def redirect_request(self, req, fp, code, msg, headers, newurl):
1626         """Return a Request or None in response to a redirect.
1627
1628         This is called by the http_error_30x methods when a
1629         redirection response is received.  If a redirection should
1630         take place, return a new Request to allow http_error_30x to
1631         perform the redirect.  Otherwise, raise HTTPError if no-one
1632         else should try to handle this url.  Return None if you can't
1633         but another Handler might.
1634         """
1635         m = req.get_method()
1636         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1637                  or code in (301, 302, 303) and m == "POST")):
1638             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
1639         # Strictly (according to RFC 2616), 301 or 302 in response to
1640         # a POST MUST NOT cause a redirection without confirmation
1641         # from the user (of urllib.request, in this case).  In practice,
1642         # essentially all clients do redirect in this case, so we do
1643         # the same.
1644
1645         # Be conciliant with URIs containing a space.  This is mainly
1646         # redundant with the more complete encoding done in http_error_302(),
1647         # but it is kept for compatibility with other callers.
1648         newurl = newurl.replace(' ', '%20')
1649
1650         CONTENT_HEADERS = ("content-length", "content-type")
1651         # NB: don't use dict comprehension for python 2.6 compatibility
1652         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1653
1654         # A 303 must either use GET or HEAD for subsequent request
1655         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1656         if code == 303 and m != 'HEAD':
1657             m = 'GET'
1658         # 301 and 302 redirects are commonly turned into a GET from a POST
1659         # for subsequent requests by browsers, so we'll do the same.
1660         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1661         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1662         if code in (301, 302) and m == 'POST':
1663             m = 'GET'
1664
1665         return urllib.request.Request(
1666             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1667             unverifiable=True, method=m)
1668
1669
1670 def extract_timezone(date_str):
1671     m = re.search(
1672         r'''(?x)
1673             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1674             (?P<tz>Z|                                            # just the UTC Z, or
1675                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1676                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1677                    [ ]?                                          # optional space
1678                 (?P<sign>\+|-)                                   # +/-
1679                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1680             $)
1681         ''', date_str)
1682     if not m:
1683         timezone = datetime.timedelta()
1684     else:
1685         date_str = date_str[:-len(m.group('tz'))]
1686         if not m.group('sign'):
1687             timezone = datetime.timedelta()
1688         else:
1689             sign = 1 if m.group('sign') == '+' else -1
1690             timezone = datetime.timedelta(
1691                 hours=sign * int(m.group('hours')),
1692                 minutes=sign * int(m.group('minutes')))
1693     return timezone, date_str
1694
1695
1696 def parse_iso8601(date_str, delimiter='T', timezone=None):
1697     """ Return a UNIX timestamp from the given date """
1698
1699     if date_str is None:
1700         return None
1701
1702     date_str = re.sub(r'\.[0-9]+', '', date_str)
1703
1704     if timezone is None:
1705         timezone, date_str = extract_timezone(date_str)
1706
1707     with contextlib.suppress(ValueError):
1708         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1709         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1710         return calendar.timegm(dt.timetuple())
1711
1712
1713 def date_formats(day_first=True):
1714     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1715
1716
1717 def unified_strdate(date_str, day_first=True):
1718     """Return a string with the date in the format YYYYMMDD"""
1719
1720     if date_str is None:
1721         return None
1722     upload_date = None
1723     # Replace commas
1724     date_str = date_str.replace(',', ' ')
1725     # Remove AM/PM + timezone
1726     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1727     _, date_str = extract_timezone(date_str)
1728
1729     for expression in date_formats(day_first):
1730         with contextlib.suppress(ValueError):
1731             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1732     if upload_date is None:
1733         timetuple = email.utils.parsedate_tz(date_str)
1734         if timetuple:
1735             with contextlib.suppress(ValueError):
1736                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1737     if upload_date is not None:
1738         return str(upload_date)
1739
1740
1741 def unified_timestamp(date_str, day_first=True):
1742     if date_str is None:
1743         return None
1744
1745     date_str = re.sub(r'[,|]', '', date_str)
1746
1747     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1748     timezone, date_str = extract_timezone(date_str)
1749
1750     # Remove AM/PM + timezone
1751     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1752
1753     # Remove unrecognized timezones from ISO 8601 alike timestamps
1754     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1755     if m:
1756         date_str = date_str[:-len(m.group('tz'))]
1757
1758     # Python only supports microseconds, so remove nanoseconds
1759     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1760     if m:
1761         date_str = m.group(1)
1762
1763     for expression in date_formats(day_first):
1764         with contextlib.suppress(ValueError):
1765             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1766             return calendar.timegm(dt.timetuple())
1767     timetuple = email.utils.parsedate_tz(date_str)
1768     if timetuple:
1769         return calendar.timegm(timetuple) + pm_delta * 3600
1770
1771
1772 def determine_ext(url, default_ext='unknown_video'):
1773     if url is None or '.' not in url:
1774         return default_ext
1775     guess = url.partition('?')[0].rpartition('.')[2]
1776     if re.match(r'^[A-Za-z0-9]+$', guess):
1777         return guess
1778     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1779     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1780         return guess.rstrip('/')
1781     else:
1782         return default_ext
1783
1784
1785 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1786     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1787
1788
1789 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1790     R"""
1791     Return a datetime object from a string.
1792     Supported format:
1793         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1794
1795     @param format       strftime format of DATE
1796     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1797                         auto: round to the unit provided in date_str (if applicable).
1798     """
1799     auto_precision = False
1800     if precision == 'auto':
1801         auto_precision = True
1802         precision = 'microsecond'
1803     today = datetime_round(datetime.datetime.utcnow(), precision)
1804     if date_str in ('now', 'today'):
1805         return today
1806     if date_str == 'yesterday':
1807         return today - datetime.timedelta(days=1)
1808     match = re.match(
1809         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1810         date_str)
1811     if match is not None:
1812         start_time = datetime_from_str(match.group('start'), precision, format)
1813         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1814         unit = match.group('unit')
1815         if unit == 'month' or unit == 'year':
1816             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1817             unit = 'day'
1818         else:
1819             if unit == 'week':
1820                 unit = 'day'
1821                 time *= 7
1822             delta = datetime.timedelta(**{unit + 's': time})
1823             new_date = start_time + delta
1824         if auto_precision:
1825             return datetime_round(new_date, unit)
1826         return new_date
1827
1828     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1829
1830
1831 def date_from_str(date_str, format='%Y%m%d', strict=False):
1832     R"""
1833     Return a date object from a string using datetime_from_str
1834
1835     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1836                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1837     """
1838     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1839         raise ValueError(f'Invalid date format "{date_str}"')
1840     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1841
1842
1843 def datetime_add_months(dt, months):
1844     """Increment/Decrement a datetime object by months."""
1845     month = dt.month + months - 1
1846     year = dt.year + month // 12
1847     month = month % 12 + 1
1848     day = min(dt.day, calendar.monthrange(year, month)[1])
1849     return dt.replace(year, month, day)
1850
1851
1852 def datetime_round(dt, precision='day'):
1853     """
1854     Round a datetime object's time to a specific precision
1855     """
1856     if precision == 'microsecond':
1857         return dt
1858
1859     unit_seconds = {
1860         'day': 86400,
1861         'hour': 3600,
1862         'minute': 60,
1863         'second': 1,
1864     }
1865     roundto = lambda x, n: ((x + n / 2) // n) * n
1866     timestamp = calendar.timegm(dt.timetuple())
1867     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1868
1869
1870 def hyphenate_date(date_str):
1871     """
1872     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1873     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1874     if match is not None:
1875         return '-'.join(match.groups())
1876     else:
1877         return date_str
1878
1879
1880 class DateRange:
1881     """Represents a time interval between two dates"""
1882
1883     def __init__(self, start=None, end=None):
1884         """start and end must be strings in the format accepted by date"""
1885         if start is not None:
1886             self.start = date_from_str(start, strict=True)
1887         else:
1888             self.start = datetime.datetime.min.date()
1889         if end is not None:
1890             self.end = date_from_str(end, strict=True)
1891         else:
1892             self.end = datetime.datetime.max.date()
1893         if self.start > self.end:
1894             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1895
1896     @classmethod
1897     def day(cls, day):
1898         """Returns a range that only contains the given day"""
1899         return cls(day, day)
1900
1901     def __contains__(self, date):
1902         """Check if the date is in the range"""
1903         if not isinstance(date, datetime.date):
1904             date = date_from_str(date)
1905         return self.start <= date <= self.end
1906
1907     def __str__(self):
1908         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1909
1910
1911 def platform_name():
1912     """ Returns the platform name as a str """
1913     write_string('DeprecationWarning: yt_dlp.utils.platform_name is deprecated, use platform.platform instead')
1914     return platform.platform()
1915
1916
1917 @functools.cache
1918 def system_identifier():
1919     python_implementation = platform.python_implementation()
1920     if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
1921         python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
1922
1923     return 'Python %s (%s %s) - %s %s' % (
1924         platform.python_version(),
1925         python_implementation,
1926         platform.architecture()[0],
1927         platform.platform(),
1928         format_field(join_nonempty(*platform.libc_ver(), delim=' '), None, '(%s)'),
1929     )
1930
1931
1932 @functools.cache
1933 def get_windows_version():
1934     ''' Get Windows version. returns () if it's not running on Windows '''
1935     if compat_os_name == 'nt':
1936         return version_tuple(platform.win32_ver()[1])
1937     else:
1938         return ()
1939
1940
1941 def write_string(s, out=None, encoding=None):
1942     assert isinstance(s, str)
1943     out = out or sys.stderr
1944
1945     if compat_os_name == 'nt' and supports_terminal_sequences(out):
1946         s = re.sub(r'([\r\n]+)', r' \1', s)
1947
1948     enc, buffer = None, out
1949     if 'b' in getattr(out, 'mode', ''):
1950         enc = encoding or preferredencoding()
1951     elif hasattr(out, 'buffer'):
1952         buffer = out.buffer
1953         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1954
1955     buffer.write(s.encode(enc, 'ignore') if enc else s)
1956     out.flush()
1957
1958
1959 def bytes_to_intlist(bs):
1960     if not bs:
1961         return []
1962     if isinstance(bs[0], int):  # Python 3
1963         return list(bs)
1964     else:
1965         return [ord(c) for c in bs]
1966
1967
1968 def intlist_to_bytes(xs):
1969     if not xs:
1970         return b''
1971     return struct.pack('%dB' % len(xs), *xs)
1972
1973
1974 class LockingUnsupportedError(OSError):
1975     msg = 'File locking is not supported'
1976
1977     def __init__(self):
1978         super().__init__(self.msg)
1979
1980
1981 # Cross-platform file locking
1982 if sys.platform == 'win32':
1983     import ctypes.wintypes
1984     import msvcrt
1985
1986     class OVERLAPPED(ctypes.Structure):
1987         _fields_ = [
1988             ('Internal', ctypes.wintypes.LPVOID),
1989             ('InternalHigh', ctypes.wintypes.LPVOID),
1990             ('Offset', ctypes.wintypes.DWORD),
1991             ('OffsetHigh', ctypes.wintypes.DWORD),
1992             ('hEvent', ctypes.wintypes.HANDLE),
1993         ]
1994
1995     kernel32 = ctypes.windll.kernel32
1996     LockFileEx = kernel32.LockFileEx
1997     LockFileEx.argtypes = [
1998         ctypes.wintypes.HANDLE,     # hFile
1999         ctypes.wintypes.DWORD,      # dwFlags
2000         ctypes.wintypes.DWORD,      # dwReserved
2001         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2002         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2003         ctypes.POINTER(OVERLAPPED)  # Overlapped
2004     ]
2005     LockFileEx.restype = ctypes.wintypes.BOOL
2006     UnlockFileEx = kernel32.UnlockFileEx
2007     UnlockFileEx.argtypes = [
2008         ctypes.wintypes.HANDLE,     # hFile
2009         ctypes.wintypes.DWORD,      # dwReserved
2010         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2011         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2012         ctypes.POINTER(OVERLAPPED)  # Overlapped
2013     ]
2014     UnlockFileEx.restype = ctypes.wintypes.BOOL
2015     whole_low = 0xffffffff
2016     whole_high = 0x7fffffff
2017
2018     def _lock_file(f, exclusive, block):
2019         overlapped = OVERLAPPED()
2020         overlapped.Offset = 0
2021         overlapped.OffsetHigh = 0
2022         overlapped.hEvent = 0
2023         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2024
2025         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2026                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2027                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2028             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2029             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2030
2031     def _unlock_file(f):
2032         assert f._lock_file_overlapped_p
2033         handle = msvcrt.get_osfhandle(f.fileno())
2034         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2035             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2036
2037 else:
2038     try:
2039         import fcntl
2040
2041         def _lock_file(f, exclusive, block):
2042             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2043             if not block:
2044                 flags |= fcntl.LOCK_NB
2045             try:
2046                 fcntl.flock(f, flags)
2047             except BlockingIOError:
2048                 raise
2049             except OSError:  # AOSP does not have flock()
2050                 fcntl.lockf(f, flags)
2051
2052         def _unlock_file(f):
2053             try:
2054                 fcntl.flock(f, fcntl.LOCK_UN)
2055             except OSError:
2056                 fcntl.lockf(f, fcntl.LOCK_UN)
2057
2058     except ImportError:
2059
2060         def _lock_file(f, exclusive, block):
2061             raise LockingUnsupportedError()
2062
2063         def _unlock_file(f):
2064             raise LockingUnsupportedError()
2065
2066
2067 class locked_file:
2068     locked = False
2069
2070     def __init__(self, filename, mode, block=True, encoding=None):
2071         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2072             raise NotImplementedError(mode)
2073         self.mode, self.block = mode, block
2074
2075         writable = any(f in mode for f in 'wax+')
2076         readable = any(f in mode for f in 'r+')
2077         flags = functools.reduce(operator.ior, (
2078             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2079             getattr(os, 'O_BINARY', 0),  # Windows only
2080             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2081             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2082             os.O_APPEND if 'a' in mode else 0,
2083             os.O_EXCL if 'x' in mode else 0,
2084             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2085         ))
2086
2087         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2088
2089     def __enter__(self):
2090         exclusive = 'r' not in self.mode
2091         try:
2092             _lock_file(self.f, exclusive, self.block)
2093             self.locked = True
2094         except OSError:
2095             self.f.close()
2096             raise
2097         if 'w' in self.mode:
2098             try:
2099                 self.f.truncate()
2100             except OSError as e:
2101                 if e.errno not in (
2102                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2103                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2104                 ):
2105                     raise
2106         return self
2107
2108     def unlock(self):
2109         if not self.locked:
2110             return
2111         try:
2112             _unlock_file(self.f)
2113         finally:
2114             self.locked = False
2115
2116     def __exit__(self, *_):
2117         try:
2118             self.unlock()
2119         finally:
2120             self.f.close()
2121
2122     open = __enter__
2123     close = __exit__
2124
2125     def __getattr__(self, attr):
2126         return getattr(self.f, attr)
2127
2128     def __iter__(self):
2129         return iter(self.f)
2130
2131
2132 @functools.cache
2133 def get_filesystem_encoding():
2134     encoding = sys.getfilesystemencoding()
2135     return encoding if encoding is not None else 'utf-8'
2136
2137
2138 def shell_quote(args):
2139     quoted_args = []
2140     encoding = get_filesystem_encoding()
2141     for a in args:
2142         if isinstance(a, bytes):
2143             # We may get a filename encoded with 'encodeFilename'
2144             a = a.decode(encoding)
2145         quoted_args.append(compat_shlex_quote(a))
2146     return ' '.join(quoted_args)
2147
2148
2149 def smuggle_url(url, data):
2150     """ Pass additional data in a URL for internal use. """
2151
2152     url, idata = unsmuggle_url(url, {})
2153     data.update(idata)
2154     sdata = urllib.parse.urlencode(
2155         {'__youtubedl_smuggle': json.dumps(data)})
2156     return url + '#' + sdata
2157
2158
2159 def unsmuggle_url(smug_url, default=None):
2160     if '#__youtubedl_smuggle' not in smug_url:
2161         return smug_url, default
2162     url, _, sdata = smug_url.rpartition('#')
2163     jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
2164     data = json.loads(jsond)
2165     return url, data
2166
2167
2168 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2169     """ Formats numbers with decimal sufixes like K, M, etc """
2170     num, factor = float_or_none(num), float(factor)
2171     if num is None or num < 0:
2172         return None
2173     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2174     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2175     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2176     if factor == 1024:
2177         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2178     converted = num / (factor ** exponent)
2179     return fmt % (converted, suffix)
2180
2181
2182 def format_bytes(bytes):
2183     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2184
2185
2186 def lookup_unit_table(unit_table, s):
2187     units_re = '|'.join(re.escape(u) for u in unit_table)
2188     m = re.match(
2189         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2190     if not m:
2191         return None
2192     num_str = m.group('num').replace(',', '.')
2193     mult = unit_table[m.group('unit')]
2194     return int(float(num_str) * mult)
2195
2196
2197 def parse_filesize(s):
2198     if s is None:
2199         return None
2200
2201     # The lower-case forms are of course incorrect and unofficial,
2202     # but we support those too
2203     _UNIT_TABLE = {
2204         'B': 1,
2205         'b': 1,
2206         'bytes': 1,
2207         'KiB': 1024,
2208         'KB': 1000,
2209         'kB': 1024,
2210         'Kb': 1000,
2211         'kb': 1000,
2212         'kilobytes': 1000,
2213         'kibibytes': 1024,
2214         'MiB': 1024 ** 2,
2215         'MB': 1000 ** 2,
2216         'mB': 1024 ** 2,
2217         'Mb': 1000 ** 2,
2218         'mb': 1000 ** 2,
2219         'megabytes': 1000 ** 2,
2220         'mebibytes': 1024 ** 2,
2221         'GiB': 1024 ** 3,
2222         'GB': 1000 ** 3,
2223         'gB': 1024 ** 3,
2224         'Gb': 1000 ** 3,
2225         'gb': 1000 ** 3,
2226         'gigabytes': 1000 ** 3,
2227         'gibibytes': 1024 ** 3,
2228         'TiB': 1024 ** 4,
2229         'TB': 1000 ** 4,
2230         'tB': 1024 ** 4,
2231         'Tb': 1000 ** 4,
2232         'tb': 1000 ** 4,
2233         'terabytes': 1000 ** 4,
2234         'tebibytes': 1024 ** 4,
2235         'PiB': 1024 ** 5,
2236         'PB': 1000 ** 5,
2237         'pB': 1024 ** 5,
2238         'Pb': 1000 ** 5,
2239         'pb': 1000 ** 5,
2240         'petabytes': 1000 ** 5,
2241         'pebibytes': 1024 ** 5,
2242         'EiB': 1024 ** 6,
2243         'EB': 1000 ** 6,
2244         'eB': 1024 ** 6,
2245         'Eb': 1000 ** 6,
2246         'eb': 1000 ** 6,
2247         'exabytes': 1000 ** 6,
2248         'exbibytes': 1024 ** 6,
2249         'ZiB': 1024 ** 7,
2250         'ZB': 1000 ** 7,
2251         'zB': 1024 ** 7,
2252         'Zb': 1000 ** 7,
2253         'zb': 1000 ** 7,
2254         'zettabytes': 1000 ** 7,
2255         'zebibytes': 1024 ** 7,
2256         'YiB': 1024 ** 8,
2257         'YB': 1000 ** 8,
2258         'yB': 1024 ** 8,
2259         'Yb': 1000 ** 8,
2260         'yb': 1000 ** 8,
2261         'yottabytes': 1000 ** 8,
2262         'yobibytes': 1024 ** 8,
2263     }
2264
2265     return lookup_unit_table(_UNIT_TABLE, s)
2266
2267
2268 def parse_count(s):
2269     if s is None:
2270         return None
2271
2272     s = re.sub(r'^[^\d]+\s', '', s).strip()
2273
2274     if re.match(r'^[\d,.]+$', s):
2275         return str_to_int(s)
2276
2277     _UNIT_TABLE = {
2278         'k': 1000,
2279         'K': 1000,
2280         'm': 1000 ** 2,
2281         'M': 1000 ** 2,
2282         'kk': 1000 ** 2,
2283         'KK': 1000 ** 2,
2284         'b': 1000 ** 3,
2285         'B': 1000 ** 3,
2286     }
2287
2288     ret = lookup_unit_table(_UNIT_TABLE, s)
2289     if ret is not None:
2290         return ret
2291
2292     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2293     if mobj:
2294         return str_to_int(mobj.group(1))
2295
2296
2297 def parse_resolution(s, *, lenient=False):
2298     if s is None:
2299         return {}
2300
2301     if lenient:
2302         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2303     else:
2304         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2305     if mobj:
2306         return {
2307             'width': int(mobj.group('w')),
2308             'height': int(mobj.group('h')),
2309         }
2310
2311     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2312     if mobj:
2313         return {'height': int(mobj.group(1))}
2314
2315     mobj = re.search(r'\b([48])[kK]\b', s)
2316     if mobj:
2317         return {'height': int(mobj.group(1)) * 540}
2318
2319     return {}
2320
2321
2322 def parse_bitrate(s):
2323     if not isinstance(s, str):
2324         return
2325     mobj = re.search(r'\b(\d+)\s*kbps', s)
2326     if mobj:
2327         return int(mobj.group(1))
2328
2329
2330 def month_by_name(name, lang='en'):
2331     """ Return the number of a month by (locale-independently) English name """
2332
2333     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2334
2335     try:
2336         return month_names.index(name) + 1
2337     except ValueError:
2338         return None
2339
2340
2341 def month_by_abbreviation(abbrev):
2342     """ Return the number of a month by (locale-independently) English
2343         abbreviations """
2344
2345     try:
2346         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2347     except ValueError:
2348         return None
2349
2350
2351 def fix_xml_ampersands(xml_str):
2352     """Replace all the '&' by '&amp;' in XML"""
2353     return re.sub(
2354         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2355         '&amp;',
2356         xml_str)
2357
2358
2359 def setproctitle(title):
2360     assert isinstance(title, str)
2361
2362     # ctypes in Jython is not complete
2363     # http://bugs.jython.org/issue2148
2364     if sys.platform.startswith('java'):
2365         return
2366
2367     try:
2368         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2369     except OSError:
2370         return
2371     except TypeError:
2372         # LoadLibrary in Windows Python 2.7.13 only expects
2373         # a bytestring, but since unicode_literals turns
2374         # every string into a unicode string, it fails.
2375         return
2376     title_bytes = title.encode()
2377     buf = ctypes.create_string_buffer(len(title_bytes))
2378     buf.value = title_bytes
2379     try:
2380         libc.prctl(15, buf, 0, 0, 0)
2381     except AttributeError:
2382         return  # Strange libc, just skip this
2383
2384
2385 def remove_start(s, start):
2386     return s[len(start):] if s is not None and s.startswith(start) else s
2387
2388
2389 def remove_end(s, end):
2390     return s[:-len(end)] if s is not None and s.endswith(end) else s
2391
2392
2393 def remove_quotes(s):
2394     if s is None or len(s) < 2:
2395         return s
2396     for quote in ('"', "'", ):
2397         if s[0] == quote and s[-1] == quote:
2398             return s[1:-1]
2399     return s
2400
2401
2402 def get_domain(url):
2403     return '.'.join(urllib.parse.urlparse(url).netloc.rsplit('.', 2)[-2:])
2404
2405
2406 def url_basename(url):
2407     path = urllib.parse.urlparse(url).path
2408     return path.strip('/').split('/')[-1]
2409
2410
2411 def base_url(url):
2412     return re.match(r'https?://[^?#&]+/', url).group()
2413
2414
2415 def urljoin(base, path):
2416     if isinstance(path, bytes):
2417         path = path.decode()
2418     if not isinstance(path, str) or not path:
2419         return None
2420     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2421         return path
2422     if isinstance(base, bytes):
2423         base = base.decode()
2424     if not isinstance(base, str) or not re.match(
2425             r'^(?:https?:)?//', base):
2426         return None
2427     return urllib.parse.urljoin(base, path)
2428
2429
2430 class HEADRequest(urllib.request.Request):
2431     def get_method(self):
2432         return 'HEAD'
2433
2434
2435 class PUTRequest(urllib.request.Request):
2436     def get_method(self):
2437         return 'PUT'
2438
2439
2440 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2441     if get_attr and v is not None:
2442         v = getattr(v, get_attr, None)
2443     try:
2444         return int(v) * invscale // scale
2445     except (ValueError, TypeError, OverflowError):
2446         return default
2447
2448
2449 def str_or_none(v, default=None):
2450     return default if v is None else str(v)
2451
2452
2453 def str_to_int(int_str):
2454     """ A more relaxed version of int_or_none """
2455     if isinstance(int_str, int):
2456         return int_str
2457     elif isinstance(int_str, str):
2458         int_str = re.sub(r'[,\.\+]', '', int_str)
2459         return int_or_none(int_str)
2460
2461
2462 def float_or_none(v, scale=1, invscale=1, default=None):
2463     if v is None:
2464         return default
2465     try:
2466         return float(v) * invscale / scale
2467     except (ValueError, TypeError):
2468         return default
2469
2470
2471 def bool_or_none(v, default=None):
2472     return v if isinstance(v, bool) else default
2473
2474
2475 def strip_or_none(v, default=None):
2476     return v.strip() if isinstance(v, str) else default
2477
2478
2479 def url_or_none(url):
2480     if not url or not isinstance(url, str):
2481         return None
2482     url = url.strip()
2483     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2484
2485
2486 def request_to_url(req):
2487     if isinstance(req, urllib.request.Request):
2488         return req.get_full_url()
2489     else:
2490         return req
2491
2492
2493 def strftime_or_none(timestamp, date_format, default=None):
2494     datetime_object = None
2495     try:
2496         if isinstance(timestamp, (int, float)):  # unix timestamp
2497             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2498         elif isinstance(timestamp, str):  # assume YYYYMMDD
2499             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2500         return datetime_object.strftime(date_format)
2501     except (ValueError, TypeError, AttributeError):
2502         return default
2503
2504
2505 def parse_duration(s):
2506     if not isinstance(s, str):
2507         return None
2508     s = s.strip()
2509     if not s:
2510         return None
2511
2512     days, hours, mins, secs, ms = [None] * 5
2513     m = re.match(r'''(?x)
2514             (?P<before_secs>
2515                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2516             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2517             (?P<ms>[.:][0-9]+)?Z?$
2518         ''', s)
2519     if m:
2520         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2521     else:
2522         m = re.match(
2523             r'''(?ix)(?:P?
2524                 (?:
2525                     [0-9]+\s*y(?:ears?)?,?\s*
2526                 )?
2527                 (?:
2528                     [0-9]+\s*m(?:onths?)?,?\s*
2529                 )?
2530                 (?:
2531                     [0-9]+\s*w(?:eeks?)?,?\s*
2532                 )?
2533                 (?:
2534                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2535                 )?
2536                 T)?
2537                 (?:
2538                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2539                 )?
2540                 (?:
2541                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2542                 )?
2543                 (?:
2544                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2545                 )?Z?$''', s)
2546         if m:
2547             days, hours, mins, secs, ms = m.groups()
2548         else:
2549             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2550             if m:
2551                 hours, mins = m.groups()
2552             else:
2553                 return None
2554
2555     if ms:
2556         ms = ms.replace(':', '.')
2557     return sum(float(part or 0) * mult for part, mult in (
2558         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2559
2560
2561 def prepend_extension(filename, ext, expected_real_ext=None):
2562     name, real_ext = os.path.splitext(filename)
2563     return (
2564         f'{name}.{ext}{real_ext}'
2565         if not expected_real_ext or real_ext[1:] == expected_real_ext
2566         else f'{filename}.{ext}')
2567
2568
2569 def replace_extension(filename, ext, expected_real_ext=None):
2570     name, real_ext = os.path.splitext(filename)
2571     return '{}.{}'.format(
2572         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2573         ext)
2574
2575
2576 def check_executable(exe, args=[]):
2577     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2578     args can be a list of arguments for a short output (like -version) """
2579     try:
2580         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2581     except OSError:
2582         return False
2583     return exe
2584
2585
2586 def _get_exe_version_output(exe, args, *, to_screen=None):
2587     if to_screen:
2588         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2589     try:
2590         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2591         # SIGTTOU if yt-dlp is run in the background.
2592         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2593         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2594                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2595     except OSError:
2596         return False
2597     return stdout
2598
2599
2600 def detect_exe_version(output, version_re=None, unrecognized='present'):
2601     assert isinstance(output, str)
2602     if version_re is None:
2603         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2604     m = re.search(version_re, output)
2605     if m:
2606         return m.group(1)
2607     else:
2608         return unrecognized
2609
2610
2611 def get_exe_version(exe, args=['--version'],
2612                     version_re=None, unrecognized='present'):
2613     """ Returns the version of the specified executable,
2614     or False if the executable is not present """
2615     out = _get_exe_version_output(exe, args)
2616     return detect_exe_version(out, version_re, unrecognized) if out else False
2617
2618
2619 def frange(start=0, stop=None, step=1):
2620     """Float range"""
2621     if stop is None:
2622         start, stop = 0, start
2623     sign = [-1, 1][step > 0] if step else 0
2624     while sign * start < sign * stop:
2625         yield start
2626         start += step
2627
2628
2629 class LazyList(collections.abc.Sequence):
2630     """Lazy immutable list from an iterable
2631     Note that slices of a LazyList are lists and not LazyList"""
2632
2633     class IndexError(IndexError):
2634         pass
2635
2636     def __init__(self, iterable, *, reverse=False, _cache=None):
2637         self._iterable = iter(iterable)
2638         self._cache = [] if _cache is None else _cache
2639         self._reversed = reverse
2640
2641     def __iter__(self):
2642         if self._reversed:
2643             # We need to consume the entire iterable to iterate in reverse
2644             yield from self.exhaust()
2645             return
2646         yield from self._cache
2647         for item in self._iterable:
2648             self._cache.append(item)
2649             yield item
2650
2651     def _exhaust(self):
2652         self._cache.extend(self._iterable)
2653         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2654         return self._cache
2655
2656     def exhaust(self):
2657         """Evaluate the entire iterable"""
2658         return self._exhaust()[::-1 if self._reversed else 1]
2659
2660     @staticmethod
2661     def _reverse_index(x):
2662         return None if x is None else -(x + 1)
2663
2664     def __getitem__(self, idx):
2665         if isinstance(idx, slice):
2666             if self._reversed:
2667                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2668             start, stop, step = idx.start, idx.stop, idx.step or 1
2669         elif isinstance(idx, int):
2670             if self._reversed:
2671                 idx = self._reverse_index(idx)
2672             start, stop, step = idx, idx, 0
2673         else:
2674             raise TypeError('indices must be integers or slices')
2675         if ((start or 0) < 0 or (stop or 0) < 0
2676                 or (start is None and step < 0)
2677                 or (stop is None and step > 0)):
2678             # We need to consume the entire iterable to be able to slice from the end
2679             # Obviously, never use this with infinite iterables
2680             self._exhaust()
2681             try:
2682                 return self._cache[idx]
2683             except IndexError as e:
2684                 raise self.IndexError(e) from e
2685         n = max(start or 0, stop or 0) - len(self._cache) + 1
2686         if n > 0:
2687             self._cache.extend(itertools.islice(self._iterable, n))
2688         try:
2689             return self._cache[idx]
2690         except IndexError as e:
2691             raise self.IndexError(e) from e
2692
2693     def __bool__(self):
2694         try:
2695             self[-1] if self._reversed else self[0]
2696         except self.IndexError:
2697             return False
2698         return True
2699
2700     def __len__(self):
2701         self._exhaust()
2702         return len(self._cache)
2703
2704     def __reversed__(self):
2705         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2706
2707     def __copy__(self):
2708         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2709
2710     def __repr__(self):
2711         # repr and str should mimic a list. So we exhaust the iterable
2712         return repr(self.exhaust())
2713
2714     def __str__(self):
2715         return repr(self.exhaust())
2716
2717
2718 class PagedList:
2719
2720     class IndexError(IndexError):
2721         pass
2722
2723     def __len__(self):
2724         # This is only useful for tests
2725         return len(self.getslice())
2726
2727     def __init__(self, pagefunc, pagesize, use_cache=True):
2728         self._pagefunc = pagefunc
2729         self._pagesize = pagesize
2730         self._pagecount = float('inf')
2731         self._use_cache = use_cache
2732         self._cache = {}
2733
2734     def getpage(self, pagenum):
2735         page_results = self._cache.get(pagenum)
2736         if page_results is None:
2737             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2738         if self._use_cache:
2739             self._cache[pagenum] = page_results
2740         return page_results
2741
2742     def getslice(self, start=0, end=None):
2743         return list(self._getslice(start, end))
2744
2745     def _getslice(self, start, end):
2746         raise NotImplementedError('This method must be implemented by subclasses')
2747
2748     def __getitem__(self, idx):
2749         assert self._use_cache, 'Indexing PagedList requires cache'
2750         if not isinstance(idx, int) or idx < 0:
2751             raise TypeError('indices must be non-negative integers')
2752         entries = self.getslice(idx, idx + 1)
2753         if not entries:
2754             raise self.IndexError()
2755         return entries[0]
2756
2757
2758 class OnDemandPagedList(PagedList):
2759     """Download pages until a page with less than maximum results"""
2760
2761     def _getslice(self, start, end):
2762         for pagenum in itertools.count(start // self._pagesize):
2763             firstid = pagenum * self._pagesize
2764             nextfirstid = pagenum * self._pagesize + self._pagesize
2765             if start >= nextfirstid:
2766                 continue
2767
2768             startv = (
2769                 start % self._pagesize
2770                 if firstid <= start < nextfirstid
2771                 else 0)
2772             endv = (
2773                 ((end - 1) % self._pagesize) + 1
2774                 if (end is not None and firstid <= end <= nextfirstid)
2775                 else None)
2776
2777             try:
2778                 page_results = self.getpage(pagenum)
2779             except Exception:
2780                 self._pagecount = pagenum - 1
2781                 raise
2782             if startv != 0 or endv is not None:
2783                 page_results = page_results[startv:endv]
2784             yield from page_results
2785
2786             # A little optimization - if current page is not "full", ie. does
2787             # not contain page_size videos then we can assume that this page
2788             # is the last one - there are no more ids on further pages -
2789             # i.e. no need to query again.
2790             if len(page_results) + startv < self._pagesize:
2791                 break
2792
2793             # If we got the whole page, but the next page is not interesting,
2794             # break out early as well
2795             if end == nextfirstid:
2796                 break
2797
2798
2799 class InAdvancePagedList(PagedList):
2800     """PagedList with total number of pages known in advance"""
2801
2802     def __init__(self, pagefunc, pagecount, pagesize):
2803         PagedList.__init__(self, pagefunc, pagesize, True)
2804         self._pagecount = pagecount
2805
2806     def _getslice(self, start, end):
2807         start_page = start // self._pagesize
2808         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2809         skip_elems = start - start_page * self._pagesize
2810         only_more = None if end is None else end - start
2811         for pagenum in range(start_page, end_page):
2812             page_results = self.getpage(pagenum)
2813             if skip_elems:
2814                 page_results = page_results[skip_elems:]
2815                 skip_elems = None
2816             if only_more is not None:
2817                 if len(page_results) < only_more:
2818                     only_more -= len(page_results)
2819                 else:
2820                     yield from page_results[:only_more]
2821                     break
2822             yield from page_results
2823
2824
2825 class PlaylistEntries:
2826     MissingEntry = object()
2827     is_exhausted = False
2828
2829     def __init__(self, ydl, info_dict):
2830         self.ydl = ydl
2831
2832         # _entries must be assigned now since infodict can change during iteration
2833         entries = info_dict.get('entries')
2834         if entries is None:
2835             raise EntryNotInPlaylist('There are no entries')
2836         elif isinstance(entries, list):
2837             self.is_exhausted = True
2838
2839         requested_entries = info_dict.get('requested_entries')
2840         self.is_incomplete = bool(requested_entries)
2841         if self.is_incomplete:
2842             assert self.is_exhausted
2843             self._entries = [self.MissingEntry] * max(requested_entries)
2844             for i, entry in zip(requested_entries, entries):
2845                 self._entries[i - 1] = entry
2846         elif isinstance(entries, (list, PagedList, LazyList)):
2847             self._entries = entries
2848         else:
2849             self._entries = LazyList(entries)
2850
2851     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2852         (?P<start>[+-]?\d+)?
2853         (?P<range>[:-]
2854             (?P<end>[+-]?\d+|inf(?:inite)?)?
2855             (?::(?P<step>[+-]?\d+))?
2856         )?''')
2857
2858     @classmethod
2859     def parse_playlist_items(cls, string):
2860         for segment in string.split(','):
2861             if not segment:
2862                 raise ValueError('There is two or more consecutive commas')
2863             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2864             if not mobj:
2865                 raise ValueError(f'{segment!r} is not a valid specification')
2866             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2867             if int_or_none(step) == 0:
2868                 raise ValueError(f'Step in {segment!r} cannot be zero')
2869             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2870
2871     def get_requested_items(self):
2872         playlist_items = self.ydl.params.get('playlist_items')
2873         playlist_start = self.ydl.params.get('playliststart', 1)
2874         playlist_end = self.ydl.params.get('playlistend')
2875         # For backwards compatibility, interpret -1 as whole list
2876         if playlist_end in (-1, None):
2877             playlist_end = ''
2878         if not playlist_items:
2879             playlist_items = f'{playlist_start}:{playlist_end}'
2880         elif playlist_start != 1 or playlist_end:
2881             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
2882
2883         for index in self.parse_playlist_items(playlist_items):
2884             for i, entry in self[index]:
2885                 yield i, entry
2886                 if not entry:
2887                     continue
2888                 try:
2889                     # TODO: Add auto-generated fields
2890                     self.ydl._match_entry(entry, incomplete=True, silent=True)
2891                 except (ExistingVideoReached, RejectedVideoReached):
2892                     return
2893
2894     def get_full_count(self):
2895         if self.is_exhausted and not self.is_incomplete:
2896             return len(self)
2897         elif isinstance(self._entries, InAdvancePagedList):
2898             if self._entries._pagesize == 1:
2899                 return self._entries._pagecount
2900
2901     @functools.cached_property
2902     def _getter(self):
2903         if isinstance(self._entries, list):
2904             def get_entry(i):
2905                 try:
2906                     entry = self._entries[i]
2907                 except IndexError:
2908                     entry = self.MissingEntry
2909                     if not self.is_incomplete:
2910                         raise self.IndexError()
2911                 if entry is self.MissingEntry:
2912                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
2913                 return entry
2914         else:
2915             def get_entry(i):
2916                 try:
2917                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
2918                 except (LazyList.IndexError, PagedList.IndexError):
2919                     raise self.IndexError()
2920         return get_entry
2921
2922     def __getitem__(self, idx):
2923         if isinstance(idx, int):
2924             idx = slice(idx, idx)
2925
2926         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
2927         step = 1 if idx.step is None else idx.step
2928         if idx.start is None:
2929             start = 0 if step > 0 else len(self) - 1
2930         else:
2931             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
2932
2933         # NB: Do not call len(self) when idx == [:]
2934         if idx.stop is None:
2935             stop = 0 if step < 0 else float('inf')
2936         else:
2937             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
2938         stop += [-1, 1][step > 0]
2939
2940         for i in frange(start, stop, step):
2941             if i < 0:
2942                 continue
2943             try:
2944                 entry = self._getter(i)
2945             except self.IndexError:
2946                 self.is_exhausted = True
2947                 if step > 0:
2948                     break
2949                 continue
2950             yield i + 1, entry
2951
2952     def __len__(self):
2953         return len(tuple(self[:]))
2954
2955     class IndexError(IndexError):
2956         pass
2957
2958
2959 def uppercase_escape(s):
2960     unicode_escape = codecs.getdecoder('unicode_escape')
2961     return re.sub(
2962         r'\\U[0-9a-fA-F]{8}',
2963         lambda m: unicode_escape(m.group(0))[0],
2964         s)
2965
2966
2967 def lowercase_escape(s):
2968     unicode_escape = codecs.getdecoder('unicode_escape')
2969     return re.sub(
2970         r'\\u[0-9a-fA-F]{4}',
2971         lambda m: unicode_escape(m.group(0))[0],
2972         s)
2973
2974
2975 def escape_rfc3986(s):
2976     """Escape non-ASCII characters as suggested by RFC 3986"""
2977     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2978
2979
2980 def escape_url(url):
2981     """Escape URL as suggested by RFC 3986"""
2982     url_parsed = urllib.parse.urlparse(url)
2983     return url_parsed._replace(
2984         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2985         path=escape_rfc3986(url_parsed.path),
2986         params=escape_rfc3986(url_parsed.params),
2987         query=escape_rfc3986(url_parsed.query),
2988         fragment=escape_rfc3986(url_parsed.fragment)
2989     ).geturl()
2990
2991
2992 def parse_qs(url):
2993     return urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
2994
2995
2996 def read_batch_urls(batch_fd):
2997     def fixup(url):
2998         if not isinstance(url, str):
2999             url = url.decode('utf-8', 'replace')
3000         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3001         for bom in BOM_UTF8:
3002             if url.startswith(bom):
3003                 url = url[len(bom):]
3004         url = url.lstrip()
3005         if not url or url.startswith(('#', ';', ']')):
3006             return False
3007         # "#" cannot be stripped out since it is part of the URI
3008         # However, it can be safely stripped out if following a whitespace
3009         return re.split(r'\s#', url, 1)[0].rstrip()
3010
3011     with contextlib.closing(batch_fd) as fd:
3012         return [url for url in map(fixup, fd) if url]
3013
3014
3015 def urlencode_postdata(*args, **kargs):
3016     return urllib.parse.urlencode(*args, **kargs).encode('ascii')
3017
3018
3019 def update_url_query(url, query):
3020     if not query:
3021         return url
3022     parsed_url = urllib.parse.urlparse(url)
3023     qs = urllib.parse.parse_qs(parsed_url.query)
3024     qs.update(query)
3025     return urllib.parse.urlunparse(parsed_url._replace(
3026         query=urllib.parse.urlencode(qs, True)))
3027
3028
3029 def update_Request(req, url=None, data=None, headers=None, query=None):
3030     req_headers = req.headers.copy()
3031     req_headers.update(headers or {})
3032     req_data = data or req.data
3033     req_url = update_url_query(url or req.get_full_url(), query)
3034     req_get_method = req.get_method()
3035     if req_get_method == 'HEAD':
3036         req_type = HEADRequest
3037     elif req_get_method == 'PUT':
3038         req_type = PUTRequest
3039     else:
3040         req_type = urllib.request.Request
3041     new_req = req_type(
3042         req_url, data=req_data, headers=req_headers,
3043         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3044     if hasattr(req, 'timeout'):
3045         new_req.timeout = req.timeout
3046     return new_req
3047
3048
3049 def _multipart_encode_impl(data, boundary):
3050     content_type = 'multipart/form-data; boundary=%s' % boundary
3051
3052     out = b''
3053     for k, v in data.items():
3054         out += b'--' + boundary.encode('ascii') + b'\r\n'
3055         if isinstance(k, str):
3056             k = k.encode()
3057         if isinstance(v, str):
3058             v = v.encode()
3059         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3060         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3061         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3062         if boundary.encode('ascii') in content:
3063             raise ValueError('Boundary overlaps with data')
3064         out += content
3065
3066     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3067
3068     return out, content_type
3069
3070
3071 def multipart_encode(data, boundary=None):
3072     '''
3073     Encode a dict to RFC 7578-compliant form-data
3074
3075     data:
3076         A dict where keys and values can be either Unicode or bytes-like
3077         objects.
3078     boundary:
3079         If specified a Unicode object, it's used as the boundary. Otherwise
3080         a random boundary is generated.
3081
3082     Reference: https://tools.ietf.org/html/rfc7578
3083     '''
3084     has_specified_boundary = boundary is not None
3085
3086     while True:
3087         if boundary is None:
3088             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3089
3090         try:
3091             out, content_type = _multipart_encode_impl(data, boundary)
3092             break
3093         except ValueError:
3094             if has_specified_boundary:
3095                 raise
3096             boundary = None
3097
3098     return out, content_type
3099
3100
3101 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3102     for val in map(d.get, variadic(key_or_keys)):
3103         if val is not None and (val or not skip_false_values):
3104             return val
3105     return default
3106
3107
3108 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3109     for f in funcs:
3110         try:
3111             val = f(*args, **kwargs)
3112         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
3113             pass
3114         else:
3115             if expected_type is None or isinstance(val, expected_type):
3116                 return val
3117
3118
3119 def try_get(src, getter, expected_type=None):
3120     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3121
3122
3123 def filter_dict(dct, cndn=lambda _, v: v is not None):
3124     return {k: v for k, v in dct.items() if cndn(k, v)}
3125
3126
3127 def merge_dicts(*dicts):
3128     merged = {}
3129     for a_dict in dicts:
3130         for k, v in a_dict.items():
3131             if (v is not None and k not in merged
3132                     or isinstance(v, str) and merged[k] == ''):
3133                 merged[k] = v
3134     return merged
3135
3136
3137 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3138     return string if isinstance(string, str) else str(string, encoding, errors)
3139
3140
3141 US_RATINGS = {
3142     'G': 0,
3143     'PG': 10,
3144     'PG-13': 13,
3145     'R': 16,
3146     'NC': 18,
3147 }
3148
3149
3150 TV_PARENTAL_GUIDELINES = {
3151     'TV-Y': 0,
3152     'TV-Y7': 7,
3153     'TV-G': 0,
3154     'TV-PG': 0,
3155     'TV-14': 14,
3156     'TV-MA': 17,
3157 }
3158
3159
3160 def parse_age_limit(s):
3161     # isinstance(False, int) is True. So type() must be used instead
3162     if type(s) is int:  # noqa: E721
3163         return s if 0 <= s <= 21 else None
3164     elif not isinstance(s, str):
3165         return None
3166     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3167     if m:
3168         return int(m.group('age'))
3169     s = s.upper()
3170     if s in US_RATINGS:
3171         return US_RATINGS[s]
3172     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3173     if m:
3174         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3175     return None
3176
3177
3178 def strip_jsonp(code):
3179     return re.sub(
3180         r'''(?sx)^
3181             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3182             (?:\s*&&\s*(?P=func_name))?
3183             \s*\(\s*(?P<callback_data>.*)\);?
3184             \s*?(?://[^\n]*)*$''',
3185         r'\g<callback_data>', code)
3186
3187
3188 def js_to_json(code, vars={}):
3189     # vars is a dict of var, val pairs to substitute
3190     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3191     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3192     INTEGER_TABLE = (
3193         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3194         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3195     )
3196
3197     def fix_kv(m):
3198         v = m.group(0)
3199         if v in ('true', 'false', 'null'):
3200             return v
3201         elif v in ('undefined', 'void 0'):
3202             return 'null'
3203         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3204             return ""
3205
3206         if v[0] in ("'", '"'):
3207             v = re.sub(r'(?s)\\.|"', lambda m: {
3208                 '"': '\\"',
3209                 "\\'": "'",
3210                 '\\\n': '',
3211                 '\\x': '\\u00',
3212             }.get(m.group(0), m.group(0)), v[1:-1])
3213         else:
3214             for regex, base in INTEGER_TABLE:
3215                 im = re.match(regex, v)
3216                 if im:
3217                     i = int(im.group(1), base)
3218                     return '"%d":' % i if v.endswith(':') else '%d' % i
3219
3220             if v in vars:
3221                 return vars[v]
3222
3223         return '"%s"' % v
3224
3225     def create_map(mobj):
3226         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
3227
3228     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3229     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
3230
3231     return re.sub(r'''(?sx)
3232         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
3233         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
3234         {comment}|,(?={skip}[\]}}])|
3235         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3236         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3237         [0-9]+(?={skip}:)|
3238         !+
3239         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3240
3241
3242 def qualities(quality_ids):
3243     """ Get a numeric quality value out of a list of possible values """
3244     def q(qid):
3245         try:
3246             return quality_ids.index(qid)
3247         except ValueError:
3248             return -1
3249     return q
3250
3251
3252 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3253
3254
3255 DEFAULT_OUTTMPL = {
3256     'default': '%(title)s [%(id)s].%(ext)s',
3257     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3258 }
3259 OUTTMPL_TYPES = {
3260     'chapter': None,
3261     'subtitle': None,
3262     'thumbnail': None,
3263     'description': 'description',
3264     'annotation': 'annotations.xml',
3265     'infojson': 'info.json',
3266     'link': None,
3267     'pl_video': None,
3268     'pl_thumbnail': None,
3269     'pl_description': 'description',
3270     'pl_infojson': 'info.json',
3271 }
3272
3273 # As of [1] format syntax is:
3274 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3275 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3276 STR_FORMAT_RE_TMPL = r'''(?x)
3277     (?<!%)(?P<prefix>(?:%%)*)
3278     %
3279     (?P<has_key>\((?P<key>{0})\))?
3280     (?P<format>
3281         (?P<conversion>[#0\-+ ]+)?
3282         (?P<min_width>\d+)?
3283         (?P<precision>\.\d+)?
3284         (?P<len_mod>[hlL])?  # unused in python
3285         {1}  # conversion type
3286     )
3287 '''
3288
3289
3290 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3291
3292
3293 def limit_length(s, length):
3294     """ Add ellipses to overly long strings """
3295     if s is None:
3296         return None
3297     ELLIPSES = '...'
3298     if len(s) > length:
3299         return s[:length - len(ELLIPSES)] + ELLIPSES
3300     return s
3301
3302
3303 def version_tuple(v):
3304     return tuple(int(e) for e in re.split(r'[-.]', v))
3305
3306
3307 def is_outdated_version(version, limit, assume_new=True):
3308     if not version:
3309         return not assume_new
3310     try:
3311         return version_tuple(version) < version_tuple(limit)
3312     except ValueError:
3313         return not assume_new
3314
3315
3316 def ytdl_is_updateable():
3317     """ Returns if yt-dlp can be updated with -U """
3318
3319     from .update import is_non_updateable
3320
3321     return not is_non_updateable()
3322
3323
3324 def args_to_str(args):
3325     # Get a short string representation for a subprocess command
3326     return ' '.join(compat_shlex_quote(a) for a in args)
3327
3328
3329 def error_to_compat_str(err):
3330     return str(err)
3331
3332
3333 def error_to_str(err):
3334     return f'{type(err).__name__}: {err}'
3335
3336
3337 def mimetype2ext(mt):
3338     if mt is None:
3339         return None
3340
3341     mt, _, params = mt.partition(';')
3342     mt = mt.strip()
3343
3344     FULL_MAP = {
3345         'audio/mp4': 'm4a',
3346         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3347         # it's the most popular one
3348         'audio/mpeg': 'mp3',
3349         'audio/x-wav': 'wav',
3350         'audio/wav': 'wav',
3351         'audio/wave': 'wav',
3352     }
3353
3354     ext = FULL_MAP.get(mt)
3355     if ext is not None:
3356         return ext
3357
3358     SUBTYPE_MAP = {
3359         '3gpp': '3gp',
3360         'smptett+xml': 'tt',
3361         'ttaf+xml': 'dfxp',
3362         'ttml+xml': 'ttml',
3363         'x-flv': 'flv',
3364         'x-mp4-fragmented': 'mp4',
3365         'x-ms-sami': 'sami',
3366         'x-ms-wmv': 'wmv',
3367         'mpegurl': 'm3u8',
3368         'x-mpegurl': 'm3u8',
3369         'vnd.apple.mpegurl': 'm3u8',
3370         'dash+xml': 'mpd',
3371         'f4m+xml': 'f4m',
3372         'hds+xml': 'f4m',
3373         'vnd.ms-sstr+xml': 'ism',
3374         'quicktime': 'mov',
3375         'mp2t': 'ts',
3376         'x-wav': 'wav',
3377         'filmstrip+json': 'fs',
3378         'svg+xml': 'svg',
3379     }
3380
3381     _, _, subtype = mt.rpartition('/')
3382     ext = SUBTYPE_MAP.get(subtype.lower())
3383     if ext is not None:
3384         return ext
3385
3386     SUFFIX_MAP = {
3387         'json': 'json',
3388         'xml': 'xml',
3389         'zip': 'zip',
3390         'gzip': 'gz',
3391     }
3392
3393     _, _, suffix = subtype.partition('+')
3394     ext = SUFFIX_MAP.get(suffix)
3395     if ext is not None:
3396         return ext
3397
3398     return subtype.replace('+', '.')
3399
3400
3401 def ext2mimetype(ext_or_url):
3402     if not ext_or_url:
3403         return None
3404     if '.' not in ext_or_url:
3405         ext_or_url = f'file.{ext_or_url}'
3406     return mimetypes.guess_type(ext_or_url)[0]
3407
3408
3409 def parse_codecs(codecs_str):
3410     # http://tools.ietf.org/html/rfc6381
3411     if not codecs_str:
3412         return {}
3413     split_codecs = list(filter(None, map(
3414         str.strip, codecs_str.strip().strip(',').split(','))))
3415     vcodec, acodec, scodec, hdr = None, None, None, None
3416     for full_codec in split_codecs:
3417         parts = full_codec.split('.')
3418         codec = parts[0].replace('0', '')
3419         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3420                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3421             if not vcodec:
3422                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
3423                 if codec in ('dvh1', 'dvhe'):
3424                     hdr = 'DV'
3425                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
3426                     hdr = 'HDR10'
3427                 elif full_codec.replace('0', '').startswith('vp9.2'):
3428                     hdr = 'HDR10'
3429         elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3430             if not acodec:
3431                 acodec = full_codec
3432         elif codec in ('stpp', 'wvtt',):
3433             if not scodec:
3434                 scodec = full_codec
3435         else:
3436             write_string(f'WARNING: Unknown codec {full_codec}\n')
3437     if vcodec or acodec or scodec:
3438         return {
3439             'vcodec': vcodec or 'none',
3440             'acodec': acodec or 'none',
3441             'dynamic_range': hdr,
3442             **({'scodec': scodec} if scodec is not None else {}),
3443         }
3444     elif len(split_codecs) == 2:
3445         return {
3446             'vcodec': split_codecs[0],
3447             'acodec': split_codecs[1],
3448         }
3449     return {}
3450
3451
3452 def urlhandle_detect_ext(url_handle):
3453     getheader = url_handle.headers.get
3454
3455     cd = getheader('Content-Disposition')
3456     if cd:
3457         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3458         if m:
3459             e = determine_ext(m.group('filename'), default_ext=None)
3460             if e:
3461                 return e
3462
3463     return mimetype2ext(getheader('Content-Type'))
3464
3465
3466 def encode_data_uri(data, mime_type):
3467     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3468
3469
3470 def age_restricted(content_limit, age_limit):
3471     """ Returns True iff the content should be blocked """
3472
3473     if age_limit is None:  # No limit set
3474         return False
3475     if content_limit is None:
3476         return False  # Content available for everyone
3477     return age_limit < content_limit
3478
3479
3480 def is_html(first_bytes):
3481     """ Detect whether a file contains HTML by examining its first bytes. """
3482
3483     BOMS = [
3484         (b'\xef\xbb\xbf', 'utf-8'),
3485         (b'\x00\x00\xfe\xff', 'utf-32-be'),
3486         (b'\xff\xfe\x00\x00', 'utf-32-le'),
3487         (b'\xff\xfe', 'utf-16-le'),
3488         (b'\xfe\xff', 'utf-16-be'),
3489     ]
3490
3491     encoding = 'utf-8'
3492     for bom, enc in BOMS:
3493         while first_bytes.startswith(bom):
3494             encoding, first_bytes = enc, first_bytes[len(bom):]
3495
3496     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3497
3498
3499 def determine_protocol(info_dict):
3500     protocol = info_dict.get('protocol')
3501     if protocol is not None:
3502         return protocol
3503
3504     url = sanitize_url(info_dict['url'])
3505     if url.startswith('rtmp'):
3506         return 'rtmp'
3507     elif url.startswith('mms'):
3508         return 'mms'
3509     elif url.startswith('rtsp'):
3510         return 'rtsp'
3511
3512     ext = determine_ext(url)
3513     if ext == 'm3u8':
3514         return 'm3u8'
3515     elif ext == 'f4m':
3516         return 'f4m'
3517
3518     return urllib.parse.urlparse(url).scheme
3519
3520
3521 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3522     """ Render a list of rows, each as a list of values.
3523     Text after a \t will be right aligned """
3524     def width(string):
3525         return len(remove_terminal_sequences(string).replace('\t', ''))
3526
3527     def get_max_lens(table):
3528         return [max(width(str(v)) for v in col) for col in zip(*table)]
3529
3530     def filter_using_list(row, filterArray):
3531         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3532
3533     max_lens = get_max_lens(data) if hide_empty else []
3534     header_row = filter_using_list(header_row, max_lens)
3535     data = [filter_using_list(row, max_lens) for row in data]
3536
3537     table = [header_row] + data
3538     max_lens = get_max_lens(table)
3539     extra_gap += 1
3540     if delim:
3541         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3542         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3543     for row in table:
3544         for pos, text in enumerate(map(str, row)):
3545             if '\t' in text:
3546                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3547             else:
3548                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3549     ret = '\n'.join(''.join(row).rstrip() for row in table)
3550     return ret
3551
3552
3553 def _match_one(filter_part, dct, incomplete):
3554     # TODO: Generalize code with YoutubeDL._build_format_filter
3555     STRING_OPERATORS = {
3556         '*=': operator.contains,
3557         '^=': lambda attr, value: attr.startswith(value),
3558         '$=': lambda attr, value: attr.endswith(value),
3559         '~=': lambda attr, value: re.search(value, attr),
3560     }
3561     COMPARISON_OPERATORS = {
3562         **STRING_OPERATORS,
3563         '<=': operator.le,  # "<=" must be defined above "<"
3564         '<': operator.lt,
3565         '>=': operator.ge,
3566         '>': operator.gt,
3567         '=': operator.eq,
3568     }
3569
3570     if isinstance(incomplete, bool):
3571         is_incomplete = lambda _: incomplete
3572     else:
3573         is_incomplete = lambda k: k in incomplete
3574
3575     operator_rex = re.compile(r'''(?x)
3576         (?P<key>[a-z_]+)
3577         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3578         (?:
3579             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3580             (?P<strval>.+?)
3581         )
3582         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3583     m = operator_rex.fullmatch(filter_part.strip())
3584     if m:
3585         m = m.groupdict()
3586         unnegated_op = COMPARISON_OPERATORS[m['op']]
3587         if m['negation']:
3588             op = lambda attr, value: not unnegated_op(attr, value)
3589         else:
3590             op = unnegated_op
3591         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3592         if m['quote']:
3593             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3594         actual_value = dct.get(m['key'])
3595         numeric_comparison = None
3596         if isinstance(actual_value, (int, float)):
3597             # If the original field is a string and matching comparisonvalue is
3598             # a number we should respect the origin of the original field
3599             # and process comparison value as a string (see
3600             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3601             try:
3602                 numeric_comparison = int(comparison_value)
3603             except ValueError:
3604                 numeric_comparison = parse_filesize(comparison_value)
3605                 if numeric_comparison is None:
3606                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3607                 if numeric_comparison is None:
3608                     numeric_comparison = parse_duration(comparison_value)
3609         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3610             raise ValueError('Operator %s only supports string values!' % m['op'])
3611         if actual_value is None:
3612             return is_incomplete(m['key']) or m['none_inclusive']
3613         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3614
3615     UNARY_OPERATORS = {
3616         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3617         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3618     }
3619     operator_rex = re.compile(r'''(?x)
3620         (?P<op>%s)\s*(?P<key>[a-z_]+)
3621         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3622     m = operator_rex.fullmatch(filter_part.strip())
3623     if m:
3624         op = UNARY_OPERATORS[m.group('op')]
3625         actual_value = dct.get(m.group('key'))
3626         if is_incomplete(m.group('key')) and actual_value is None:
3627             return True
3628         return op(actual_value)
3629
3630     raise ValueError('Invalid filter part %r' % filter_part)
3631
3632
3633 def match_str(filter_str, dct, incomplete=False):
3634     """ Filter a dictionary with a simple string syntax.
3635     @returns           Whether the filter passes
3636     @param incomplete  Set of keys that is expected to be missing from dct.
3637                        Can be True/False to indicate all/none of the keys may be missing.
3638                        All conditions on incomplete keys pass if the key is missing
3639     """
3640     return all(
3641         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3642         for filter_part in re.split(r'(?<!\\)&', filter_str))
3643
3644
3645 def match_filter_func(filters):
3646     if not filters:
3647         return None
3648     filters = set(variadic(filters))
3649
3650     interactive = '-' in filters
3651     if interactive:
3652         filters.remove('-')
3653
3654     def _match_func(info_dict, incomplete=False):
3655         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3656             return NO_DEFAULT if interactive and not incomplete else None
3657         else:
3658             video_title = info_dict.get('title') or info_dict.get('id') or 'video'
3659             filter_str = ') | ('.join(map(str.strip, filters))
3660             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3661     return _match_func
3662
3663
3664 def download_range_func(chapters, ranges):
3665     def inner(info_dict, ydl):
3666         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3667                    else 'Cannot match chapters since chapter information is unavailable')
3668         for regex in chapters or []:
3669             for i, chapter in enumerate(info_dict.get('chapters') or []):
3670                 if re.search(regex, chapter['title']):
3671                     warning = None
3672                     yield {**chapter, 'index': i}
3673         if chapters and warning:
3674             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3675
3676         yield from ({'start_time': start, 'end_time': end} for start, end in ranges or [])
3677
3678     return inner
3679
3680
3681 def parse_dfxp_time_expr(time_expr):
3682     if not time_expr:
3683         return
3684
3685     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3686     if mobj:
3687         return float(mobj.group('time_offset'))
3688
3689     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3690     if mobj:
3691         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3692
3693
3694 def srt_subtitles_timecode(seconds):
3695     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3696
3697
3698 def ass_subtitles_timecode(seconds):
3699     time = timetuple_from_msec(seconds * 1000)
3700     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3701
3702
3703 def dfxp2srt(dfxp_data):
3704     '''
3705     @param dfxp_data A bytes-like object containing DFXP data
3706     @returns A unicode object containing converted SRT data
3707     '''
3708     LEGACY_NAMESPACES = (
3709         (b'http://www.w3.org/ns/ttml', [
3710             b'http://www.w3.org/2004/11/ttaf1',
3711             b'http://www.w3.org/2006/04/ttaf1',
3712             b'http://www.w3.org/2006/10/ttaf1',
3713         ]),
3714         (b'http://www.w3.org/ns/ttml#styling', [
3715             b'http://www.w3.org/ns/ttml#style',
3716         ]),
3717     )
3718
3719     SUPPORTED_STYLING = [
3720         'color',
3721         'fontFamily',
3722         'fontSize',
3723         'fontStyle',
3724         'fontWeight',
3725         'textDecoration'
3726     ]
3727
3728     _x = functools.partial(xpath_with_ns, ns_map={
3729         'xml': 'http://www.w3.org/XML/1998/namespace',
3730         'ttml': 'http://www.w3.org/ns/ttml',
3731         'tts': 'http://www.w3.org/ns/ttml#styling',
3732     })
3733
3734     styles = {}
3735     default_style = {}
3736
3737     class TTMLPElementParser:
3738         _out = ''
3739         _unclosed_elements = []
3740         _applied_styles = []
3741
3742         def start(self, tag, attrib):
3743             if tag in (_x('ttml:br'), 'br'):
3744                 self._out += '\n'
3745             else:
3746                 unclosed_elements = []
3747                 style = {}
3748                 element_style_id = attrib.get('style')
3749                 if default_style:
3750                     style.update(default_style)
3751                 if element_style_id:
3752                     style.update(styles.get(element_style_id, {}))
3753                 for prop in SUPPORTED_STYLING:
3754                     prop_val = attrib.get(_x('tts:' + prop))
3755                     if prop_val:
3756                         style[prop] = prop_val
3757                 if style:
3758                     font = ''
3759                     for k, v in sorted(style.items()):
3760                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3761                             continue
3762                         if k == 'color':
3763                             font += ' color="%s"' % v
3764                         elif k == 'fontSize':
3765                             font += ' size="%s"' % v
3766                         elif k == 'fontFamily':
3767                             font += ' face="%s"' % v
3768                         elif k == 'fontWeight' and v == 'bold':
3769                             self._out += '<b>'
3770                             unclosed_elements.append('b')
3771                         elif k == 'fontStyle' and v == 'italic':
3772                             self._out += '<i>'
3773                             unclosed_elements.append('i')
3774                         elif k == 'textDecoration' and v == 'underline':
3775                             self._out += '<u>'
3776                             unclosed_elements.append('u')
3777                     if font:
3778                         self._out += '<font' + font + '>'
3779                         unclosed_elements.append('font')
3780                     applied_style = {}
3781                     if self._applied_styles:
3782                         applied_style.update(self._applied_styles[-1])
3783                     applied_style.update(style)
3784                     self._applied_styles.append(applied_style)
3785                 self._unclosed_elements.append(unclosed_elements)
3786
3787         def end(self, tag):
3788             if tag not in (_x('ttml:br'), 'br'):
3789                 unclosed_elements = self._unclosed_elements.pop()
3790                 for element in reversed(unclosed_elements):
3791                     self._out += '</%s>' % element
3792                 if unclosed_elements and self._applied_styles:
3793                     self._applied_styles.pop()
3794
3795         def data(self, data):
3796             self._out += data
3797
3798         def close(self):
3799             return self._out.strip()
3800
3801     def parse_node(node):
3802         target = TTMLPElementParser()
3803         parser = xml.etree.ElementTree.XMLParser(target=target)
3804         parser.feed(xml.etree.ElementTree.tostring(node))
3805         return parser.close()
3806
3807     for k, v in LEGACY_NAMESPACES:
3808         for ns in v:
3809             dfxp_data = dfxp_data.replace(ns, k)
3810
3811     dfxp = compat_etree_fromstring(dfxp_data)
3812     out = []
3813     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3814
3815     if not paras:
3816         raise ValueError('Invalid dfxp/TTML subtitle')
3817
3818     repeat = False
3819     while True:
3820         for style in dfxp.findall(_x('.//ttml:style')):
3821             style_id = style.get('id') or style.get(_x('xml:id'))
3822             if not style_id:
3823                 continue
3824             parent_style_id = style.get('style')
3825             if parent_style_id:
3826                 if parent_style_id not in styles:
3827                     repeat = True
3828                     continue
3829                 styles[style_id] = styles[parent_style_id].copy()
3830             for prop in SUPPORTED_STYLING:
3831                 prop_val = style.get(_x('tts:' + prop))
3832                 if prop_val:
3833                     styles.setdefault(style_id, {})[prop] = prop_val
3834         if repeat:
3835             repeat = False
3836         else:
3837             break
3838
3839     for p in ('body', 'div'):
3840         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3841         if ele is None:
3842             continue
3843         style = styles.get(ele.get('style'))
3844         if not style:
3845             continue
3846         default_style.update(style)
3847
3848     for para, index in zip(paras, itertools.count(1)):
3849         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3850         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3851         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3852         if begin_time is None:
3853             continue
3854         if not end_time:
3855             if not dur:
3856                 continue
3857             end_time = begin_time + dur
3858         out.append('%d\n%s --> %s\n%s\n\n' % (
3859             index,
3860             srt_subtitles_timecode(begin_time),
3861             srt_subtitles_timecode(end_time),
3862             parse_node(para)))
3863
3864     return ''.join(out)
3865
3866
3867 def cli_option(params, command_option, param, separator=None):
3868     param = params.get(param)
3869     return ([] if param is None
3870             else [command_option, str(param)] if separator is None
3871             else [f'{command_option}{separator}{param}'])
3872
3873
3874 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3875     param = params.get(param)
3876     assert param in (True, False, None)
3877     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
3878
3879
3880 def cli_valueless_option(params, command_option, param, expected_value=True):
3881     return [command_option] if params.get(param) == expected_value else []
3882
3883
3884 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3885     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3886         if use_compat:
3887             return argdict
3888         else:
3889             argdict = None
3890     if argdict is None:
3891         return default
3892     assert isinstance(argdict, dict)
3893
3894     assert isinstance(keys, (list, tuple))
3895     for key_list in keys:
3896         arg_list = list(filter(
3897             lambda x: x is not None,
3898             [argdict.get(key.lower()) for key in variadic(key_list)]))
3899         if arg_list:
3900             return [arg for args in arg_list for arg in args]
3901     return default
3902
3903
3904 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3905     main_key, exe = main_key.lower(), exe.lower()
3906     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3907     keys = [f'{root_key}{k}' for k in (keys or [''])]
3908     if root_key in keys:
3909         if main_key != exe:
3910             keys.append((main_key, exe))
3911         keys.append('default')
3912     else:
3913         use_compat = False
3914     return cli_configuration_args(argdict, keys, default, use_compat)
3915
3916
3917 class ISO639Utils:
3918     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3919     _lang_map = {
3920         'aa': 'aar',
3921         'ab': 'abk',
3922         'ae': 'ave',
3923         'af': 'afr',
3924         'ak': 'aka',
3925         'am': 'amh',
3926         'an': 'arg',
3927         'ar': 'ara',
3928         'as': 'asm',
3929         'av': 'ava',
3930         'ay': 'aym',
3931         'az': 'aze',
3932         'ba': 'bak',
3933         'be': 'bel',
3934         'bg': 'bul',
3935         'bh': 'bih',
3936         'bi': 'bis',
3937         'bm': 'bam',
3938         'bn': 'ben',
3939         'bo': 'bod',
3940         'br': 'bre',
3941         'bs': 'bos',
3942         'ca': 'cat',
3943         'ce': 'che',
3944         'ch': 'cha',
3945         'co': 'cos',
3946         'cr': 'cre',
3947         'cs': 'ces',
3948         'cu': 'chu',
3949         'cv': 'chv',
3950         'cy': 'cym',
3951         'da': 'dan',
3952         'de': 'deu',
3953         'dv': 'div',
3954         'dz': 'dzo',
3955         'ee': 'ewe',
3956         'el': 'ell',
3957         'en': 'eng',
3958         'eo': 'epo',
3959         'es': 'spa',
3960         'et': 'est',
3961         'eu': 'eus',
3962         'fa': 'fas',
3963         'ff': 'ful',
3964         'fi': 'fin',
3965         'fj': 'fij',
3966         'fo': 'fao',
3967         'fr': 'fra',
3968         'fy': 'fry',
3969         'ga': 'gle',
3970         'gd': 'gla',
3971         'gl': 'glg',
3972         'gn': 'grn',
3973         'gu': 'guj',
3974         'gv': 'glv',
3975         'ha': 'hau',
3976         'he': 'heb',
3977         'iw': 'heb',  # Replaced by he in 1989 revision
3978         'hi': 'hin',
3979         'ho': 'hmo',
3980         'hr': 'hrv',
3981         'ht': 'hat',
3982         'hu': 'hun',
3983         'hy': 'hye',
3984         'hz': 'her',
3985         'ia': 'ina',
3986         'id': 'ind',
3987         'in': 'ind',  # Replaced by id in 1989 revision
3988         'ie': 'ile',
3989         'ig': 'ibo',
3990         'ii': 'iii',
3991         'ik': 'ipk',
3992         'io': 'ido',
3993         'is': 'isl',
3994         'it': 'ita',
3995         'iu': 'iku',
3996         'ja': 'jpn',
3997         'jv': 'jav',
3998         'ka': 'kat',
3999         'kg': 'kon',
4000         'ki': 'kik',
4001         'kj': 'kua',
4002         'kk': 'kaz',
4003         'kl': 'kal',
4004         'km': 'khm',
4005         'kn': 'kan',
4006         'ko': 'kor',
4007         'kr': 'kau',
4008         'ks': 'kas',
4009         'ku': 'kur',
4010         'kv': 'kom',
4011         'kw': 'cor',
4012         'ky': 'kir',
4013         'la': 'lat',
4014         'lb': 'ltz',
4015         'lg': 'lug',
4016         'li': 'lim',
4017         'ln': 'lin',
4018         'lo': 'lao',
4019         'lt': 'lit',
4020         'lu': 'lub',
4021         'lv': 'lav',
4022         'mg': 'mlg',
4023         'mh': 'mah',
4024         'mi': 'mri',
4025         'mk': 'mkd',
4026         'ml': 'mal',
4027         'mn': 'mon',
4028         'mr': 'mar',
4029         'ms': 'msa',
4030         'mt': 'mlt',
4031         'my': 'mya',
4032         'na': 'nau',
4033         'nb': 'nob',
4034         'nd': 'nde',
4035         'ne': 'nep',
4036         'ng': 'ndo',
4037         'nl': 'nld',
4038         'nn': 'nno',
4039         'no': 'nor',
4040         'nr': 'nbl',
4041         'nv': 'nav',
4042         'ny': 'nya',
4043         'oc': 'oci',
4044         'oj': 'oji',
4045         'om': 'orm',
4046         'or': 'ori',
4047         'os': 'oss',
4048         'pa': 'pan',
4049         'pi': 'pli',
4050         'pl': 'pol',
4051         'ps': 'pus',
4052         'pt': 'por',
4053         'qu': 'que',
4054         'rm': 'roh',
4055         'rn': 'run',
4056         'ro': 'ron',
4057         'ru': 'rus',
4058         'rw': 'kin',
4059         'sa': 'san',
4060         'sc': 'srd',
4061         'sd': 'snd',
4062         'se': 'sme',
4063         'sg': 'sag',
4064         'si': 'sin',
4065         'sk': 'slk',
4066         'sl': 'slv',
4067         'sm': 'smo',
4068         'sn': 'sna',
4069         'so': 'som',
4070         'sq': 'sqi',
4071         'sr': 'srp',
4072         'ss': 'ssw',
4073         'st': 'sot',
4074         'su': 'sun',
4075         'sv': 'swe',
4076         'sw': 'swa',
4077         'ta': 'tam',
4078         'te': 'tel',
4079         'tg': 'tgk',
4080         'th': 'tha',
4081         'ti': 'tir',
4082         'tk': 'tuk',
4083         'tl': 'tgl',
4084         'tn': 'tsn',
4085         'to': 'ton',
4086         'tr': 'tur',
4087         'ts': 'tso',
4088         'tt': 'tat',
4089         'tw': 'twi',
4090         'ty': 'tah',
4091         'ug': 'uig',
4092         'uk': 'ukr',
4093         'ur': 'urd',
4094         'uz': 'uzb',
4095         've': 'ven',
4096         'vi': 'vie',
4097         'vo': 'vol',
4098         'wa': 'wln',
4099         'wo': 'wol',
4100         'xh': 'xho',
4101         'yi': 'yid',
4102         'ji': 'yid',  # Replaced by yi in 1989 revision
4103         'yo': 'yor',
4104         'za': 'zha',
4105         'zh': 'zho',
4106         'zu': 'zul',
4107     }
4108
4109     @classmethod
4110     def short2long(cls, code):
4111         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4112         return cls._lang_map.get(code[:2])
4113
4114     @classmethod
4115     def long2short(cls, code):
4116         """Convert language code from ISO 639-2/T to ISO 639-1"""
4117         for short_name, long_name in cls._lang_map.items():
4118             if long_name == code:
4119                 return short_name
4120
4121
4122 class ISO3166Utils:
4123     # From http://data.okfn.org/data/core/country-list
4124     _country_map = {
4125         'AF': 'Afghanistan',
4126         'AX': 'Åland Islands',
4127         'AL': 'Albania',
4128         'DZ': 'Algeria',
4129         'AS': 'American Samoa',
4130         'AD': 'Andorra',
4131         'AO': 'Angola',
4132         'AI': 'Anguilla',
4133         'AQ': 'Antarctica',
4134         'AG': 'Antigua and Barbuda',
4135         'AR': 'Argentina',
4136         'AM': 'Armenia',
4137         'AW': 'Aruba',
4138         'AU': 'Australia',
4139         'AT': 'Austria',
4140         'AZ': 'Azerbaijan',
4141         'BS': 'Bahamas',
4142         'BH': 'Bahrain',
4143         'BD': 'Bangladesh',
4144         'BB': 'Barbados',
4145         'BY': 'Belarus',
4146         'BE': 'Belgium',
4147         'BZ': 'Belize',
4148         'BJ': 'Benin',
4149         'BM': 'Bermuda',
4150         'BT': 'Bhutan',
4151         'BO': 'Bolivia, Plurinational State of',
4152         'BQ': 'Bonaire, Sint Eustatius and Saba',
4153         'BA': 'Bosnia and Herzegovina',
4154         'BW': 'Botswana',
4155         'BV': 'Bouvet Island',
4156         'BR': 'Brazil',
4157         'IO': 'British Indian Ocean Territory',
4158         'BN': 'Brunei Darussalam',
4159         'BG': 'Bulgaria',
4160         'BF': 'Burkina Faso',
4161         'BI': 'Burundi',
4162         'KH': 'Cambodia',
4163         'CM': 'Cameroon',
4164         'CA': 'Canada',
4165         'CV': 'Cape Verde',
4166         'KY': 'Cayman Islands',
4167         'CF': 'Central African Republic',
4168         'TD': 'Chad',
4169         'CL': 'Chile',
4170         'CN': 'China',
4171         'CX': 'Christmas Island',
4172         'CC': 'Cocos (Keeling) Islands',
4173         'CO': 'Colombia',
4174         'KM': 'Comoros',
4175         'CG': 'Congo',
4176         'CD': 'Congo, the Democratic Republic of the',
4177         'CK': 'Cook Islands',
4178         'CR': 'Costa Rica',
4179         'CI': 'Côte d\'Ivoire',
4180         'HR': 'Croatia',
4181         'CU': 'Cuba',
4182         'CW': 'Curaçao',
4183         'CY': 'Cyprus',
4184         'CZ': 'Czech Republic',
4185         'DK': 'Denmark',
4186         'DJ': 'Djibouti',
4187         'DM': 'Dominica',
4188         'DO': 'Dominican Republic',
4189         'EC': 'Ecuador',
4190         'EG': 'Egypt',
4191         'SV': 'El Salvador',
4192         'GQ': 'Equatorial Guinea',
4193         'ER': 'Eritrea',
4194         'EE': 'Estonia',
4195         'ET': 'Ethiopia',
4196         'FK': 'Falkland Islands (Malvinas)',
4197         'FO': 'Faroe Islands',
4198         'FJ': 'Fiji',
4199         'FI': 'Finland',
4200         'FR': 'France',
4201         'GF': 'French Guiana',
4202         'PF': 'French Polynesia',
4203         'TF': 'French Southern Territories',
4204         'GA': 'Gabon',
4205         'GM': 'Gambia',
4206         'GE': 'Georgia',
4207         'DE': 'Germany',
4208         'GH': 'Ghana',
4209         'GI': 'Gibraltar',
4210         'GR': 'Greece',
4211         'GL': 'Greenland',
4212         'GD': 'Grenada',
4213         'GP': 'Guadeloupe',
4214         'GU': 'Guam',
4215         'GT': 'Guatemala',
4216         'GG': 'Guernsey',
4217         'GN': 'Guinea',
4218         'GW': 'Guinea-Bissau',
4219         'GY': 'Guyana',
4220         'HT': 'Haiti',
4221         'HM': 'Heard Island and McDonald Islands',
4222         'VA': 'Holy See (Vatican City State)',
4223         'HN': 'Honduras',
4224         'HK': 'Hong Kong',
4225         'HU': 'Hungary',
4226         'IS': 'Iceland',
4227         'IN': 'India',
4228         'ID': 'Indonesia',
4229         'IR': 'Iran, Islamic Republic of',
4230         'IQ': 'Iraq',
4231         'IE': 'Ireland',
4232         'IM': 'Isle of Man',
4233         'IL': 'Israel',
4234         'IT': 'Italy',
4235         'JM': 'Jamaica',
4236         'JP': 'Japan',
4237         'JE': 'Jersey',
4238         'JO': 'Jordan',
4239         'KZ': 'Kazakhstan',
4240         'KE': 'Kenya',
4241         'KI': 'Kiribati',
4242         'KP': 'Korea, Democratic People\'s Republic of',
4243         'KR': 'Korea, Republic of',
4244         'KW': 'Kuwait',
4245         'KG': 'Kyrgyzstan',
4246         'LA': 'Lao People\'s Democratic Republic',
4247         'LV': 'Latvia',
4248         'LB': 'Lebanon',
4249         'LS': 'Lesotho',
4250         'LR': 'Liberia',
4251         'LY': 'Libya',
4252         'LI': 'Liechtenstein',
4253         'LT': 'Lithuania',
4254         'LU': 'Luxembourg',
4255         'MO': 'Macao',
4256         'MK': 'Macedonia, the Former Yugoslav Republic of',
4257         'MG': 'Madagascar',
4258         'MW': 'Malawi',
4259         'MY': 'Malaysia',
4260         'MV': 'Maldives',
4261         'ML': 'Mali',
4262         'MT': 'Malta',
4263         'MH': 'Marshall Islands',
4264         'MQ': 'Martinique',
4265         'MR': 'Mauritania',
4266         'MU': 'Mauritius',
4267         'YT': 'Mayotte',
4268         'MX': 'Mexico',
4269         'FM': 'Micronesia, Federated States of',
4270         'MD': 'Moldova, Republic of',
4271         'MC': 'Monaco',
4272         'MN': 'Mongolia',
4273         'ME': 'Montenegro',
4274         'MS': 'Montserrat',
4275         'MA': 'Morocco',
4276         'MZ': 'Mozambique',
4277         'MM': 'Myanmar',
4278         'NA': 'Namibia',
4279         'NR': 'Nauru',
4280         'NP': 'Nepal',
4281         'NL': 'Netherlands',
4282         'NC': 'New Caledonia',
4283         'NZ': 'New Zealand',
4284         'NI': 'Nicaragua',
4285         'NE': 'Niger',
4286         'NG': 'Nigeria',
4287         'NU': 'Niue',
4288         'NF': 'Norfolk Island',
4289         'MP': 'Northern Mariana Islands',
4290         'NO': 'Norway',
4291         'OM': 'Oman',
4292         'PK': 'Pakistan',
4293         'PW': 'Palau',
4294         'PS': 'Palestine, State of',
4295         'PA': 'Panama',
4296         'PG': 'Papua New Guinea',
4297         'PY': 'Paraguay',
4298         'PE': 'Peru',
4299         'PH': 'Philippines',
4300         'PN': 'Pitcairn',
4301         'PL': 'Poland',
4302         'PT': 'Portugal',
4303         'PR': 'Puerto Rico',
4304         'QA': 'Qatar',
4305         'RE': 'Réunion',
4306         'RO': 'Romania',
4307         'RU': 'Russian Federation',
4308         'RW': 'Rwanda',
4309         'BL': 'Saint Barthélemy',
4310         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4311         'KN': 'Saint Kitts and Nevis',
4312         'LC': 'Saint Lucia',
4313         'MF': 'Saint Martin (French part)',
4314         'PM': 'Saint Pierre and Miquelon',
4315         'VC': 'Saint Vincent and the Grenadines',
4316         'WS': 'Samoa',
4317         'SM': 'San Marino',
4318         'ST': 'Sao Tome and Principe',
4319         'SA': 'Saudi Arabia',
4320         'SN': 'Senegal',
4321         'RS': 'Serbia',
4322         'SC': 'Seychelles',
4323         'SL': 'Sierra Leone',
4324         'SG': 'Singapore',
4325         'SX': 'Sint Maarten (Dutch part)',
4326         'SK': 'Slovakia',
4327         'SI': 'Slovenia',
4328         'SB': 'Solomon Islands',
4329         'SO': 'Somalia',
4330         'ZA': 'South Africa',
4331         'GS': 'South Georgia and the South Sandwich Islands',
4332         'SS': 'South Sudan',
4333         'ES': 'Spain',
4334         'LK': 'Sri Lanka',
4335         'SD': 'Sudan',
4336         'SR': 'Suriname',
4337         'SJ': 'Svalbard and Jan Mayen',
4338         'SZ': 'Swaziland',
4339         'SE': 'Sweden',
4340         'CH': 'Switzerland',
4341         'SY': 'Syrian Arab Republic',
4342         'TW': 'Taiwan, Province of China',
4343         'TJ': 'Tajikistan',
4344         'TZ': 'Tanzania, United Republic of',
4345         'TH': 'Thailand',
4346         'TL': 'Timor-Leste',
4347         'TG': 'Togo',
4348         'TK': 'Tokelau',
4349         'TO': 'Tonga',
4350         'TT': 'Trinidad and Tobago',
4351         'TN': 'Tunisia',
4352         'TR': 'Turkey',
4353         'TM': 'Turkmenistan',
4354         'TC': 'Turks and Caicos Islands',
4355         'TV': 'Tuvalu',
4356         'UG': 'Uganda',
4357         'UA': 'Ukraine',
4358         'AE': 'United Arab Emirates',
4359         'GB': 'United Kingdom',
4360         'US': 'United States',
4361         'UM': 'United States Minor Outlying Islands',
4362         'UY': 'Uruguay',
4363         'UZ': 'Uzbekistan',
4364         'VU': 'Vanuatu',
4365         'VE': 'Venezuela, Bolivarian Republic of',
4366         'VN': 'Viet Nam',
4367         'VG': 'Virgin Islands, British',
4368         'VI': 'Virgin Islands, U.S.',
4369         'WF': 'Wallis and Futuna',
4370         'EH': 'Western Sahara',
4371         'YE': 'Yemen',
4372         'ZM': 'Zambia',
4373         'ZW': 'Zimbabwe',
4374         # Not ISO 3166 codes, but used for IP blocks
4375         'AP': 'Asia/Pacific Region',
4376         'EU': 'Europe',
4377     }
4378
4379     @classmethod
4380     def short2full(cls, code):
4381         """Convert an ISO 3166-2 country code to the corresponding full name"""
4382         return cls._country_map.get(code.upper())
4383
4384
4385 class GeoUtils:
4386     # Major IPv4 address blocks per country
4387     _country_ip_map = {
4388         'AD': '46.172.224.0/19',
4389         'AE': '94.200.0.0/13',
4390         'AF': '149.54.0.0/17',
4391         'AG': '209.59.64.0/18',
4392         'AI': '204.14.248.0/21',
4393         'AL': '46.99.0.0/16',
4394         'AM': '46.70.0.0/15',
4395         'AO': '105.168.0.0/13',
4396         'AP': '182.50.184.0/21',
4397         'AQ': '23.154.160.0/24',
4398         'AR': '181.0.0.0/12',
4399         'AS': '202.70.112.0/20',
4400         'AT': '77.116.0.0/14',
4401         'AU': '1.128.0.0/11',
4402         'AW': '181.41.0.0/18',
4403         'AX': '185.217.4.0/22',
4404         'AZ': '5.197.0.0/16',
4405         'BA': '31.176.128.0/17',
4406         'BB': '65.48.128.0/17',
4407         'BD': '114.130.0.0/16',
4408         'BE': '57.0.0.0/8',
4409         'BF': '102.178.0.0/15',
4410         'BG': '95.42.0.0/15',
4411         'BH': '37.131.0.0/17',
4412         'BI': '154.117.192.0/18',
4413         'BJ': '137.255.0.0/16',
4414         'BL': '185.212.72.0/23',
4415         'BM': '196.12.64.0/18',
4416         'BN': '156.31.0.0/16',
4417         'BO': '161.56.0.0/16',
4418         'BQ': '161.0.80.0/20',
4419         'BR': '191.128.0.0/12',
4420         'BS': '24.51.64.0/18',
4421         'BT': '119.2.96.0/19',
4422         'BW': '168.167.0.0/16',
4423         'BY': '178.120.0.0/13',
4424         'BZ': '179.42.192.0/18',
4425         'CA': '99.224.0.0/11',
4426         'CD': '41.243.0.0/16',
4427         'CF': '197.242.176.0/21',
4428         'CG': '160.113.0.0/16',
4429         'CH': '85.0.0.0/13',
4430         'CI': '102.136.0.0/14',
4431         'CK': '202.65.32.0/19',
4432         'CL': '152.172.0.0/14',
4433         'CM': '102.244.0.0/14',
4434         'CN': '36.128.0.0/10',
4435         'CO': '181.240.0.0/12',
4436         'CR': '201.192.0.0/12',
4437         'CU': '152.206.0.0/15',
4438         'CV': '165.90.96.0/19',
4439         'CW': '190.88.128.0/17',
4440         'CY': '31.153.0.0/16',
4441         'CZ': '88.100.0.0/14',
4442         'DE': '53.0.0.0/8',
4443         'DJ': '197.241.0.0/17',
4444         'DK': '87.48.0.0/12',
4445         'DM': '192.243.48.0/20',
4446         'DO': '152.166.0.0/15',
4447         'DZ': '41.96.0.0/12',
4448         'EC': '186.68.0.0/15',
4449         'EE': '90.190.0.0/15',
4450         'EG': '156.160.0.0/11',
4451         'ER': '196.200.96.0/20',
4452         'ES': '88.0.0.0/11',
4453         'ET': '196.188.0.0/14',
4454         'EU': '2.16.0.0/13',
4455         'FI': '91.152.0.0/13',
4456         'FJ': '144.120.0.0/16',
4457         'FK': '80.73.208.0/21',
4458         'FM': '119.252.112.0/20',
4459         'FO': '88.85.32.0/19',
4460         'FR': '90.0.0.0/9',
4461         'GA': '41.158.0.0/15',
4462         'GB': '25.0.0.0/8',
4463         'GD': '74.122.88.0/21',
4464         'GE': '31.146.0.0/16',
4465         'GF': '161.22.64.0/18',
4466         'GG': '62.68.160.0/19',
4467         'GH': '154.160.0.0/12',
4468         'GI': '95.164.0.0/16',
4469         'GL': '88.83.0.0/19',
4470         'GM': '160.182.0.0/15',
4471         'GN': '197.149.192.0/18',
4472         'GP': '104.250.0.0/19',
4473         'GQ': '105.235.224.0/20',
4474         'GR': '94.64.0.0/13',
4475         'GT': '168.234.0.0/16',
4476         'GU': '168.123.0.0/16',
4477         'GW': '197.214.80.0/20',
4478         'GY': '181.41.64.0/18',
4479         'HK': '113.252.0.0/14',
4480         'HN': '181.210.0.0/16',
4481         'HR': '93.136.0.0/13',
4482         'HT': '148.102.128.0/17',
4483         'HU': '84.0.0.0/14',
4484         'ID': '39.192.0.0/10',
4485         'IE': '87.32.0.0/12',
4486         'IL': '79.176.0.0/13',
4487         'IM': '5.62.80.0/20',
4488         'IN': '117.192.0.0/10',
4489         'IO': '203.83.48.0/21',
4490         'IQ': '37.236.0.0/14',
4491         'IR': '2.176.0.0/12',
4492         'IS': '82.221.0.0/16',
4493         'IT': '79.0.0.0/10',
4494         'JE': '87.244.64.0/18',
4495         'JM': '72.27.0.0/17',
4496         'JO': '176.29.0.0/16',
4497         'JP': '133.0.0.0/8',
4498         'KE': '105.48.0.0/12',
4499         'KG': '158.181.128.0/17',
4500         'KH': '36.37.128.0/17',
4501         'KI': '103.25.140.0/22',
4502         'KM': '197.255.224.0/20',
4503         'KN': '198.167.192.0/19',
4504         'KP': '175.45.176.0/22',
4505         'KR': '175.192.0.0/10',
4506         'KW': '37.36.0.0/14',
4507         'KY': '64.96.0.0/15',
4508         'KZ': '2.72.0.0/13',
4509         'LA': '115.84.64.0/18',
4510         'LB': '178.135.0.0/16',
4511         'LC': '24.92.144.0/20',
4512         'LI': '82.117.0.0/19',
4513         'LK': '112.134.0.0/15',
4514         'LR': '102.183.0.0/16',
4515         'LS': '129.232.0.0/17',
4516         'LT': '78.56.0.0/13',
4517         'LU': '188.42.0.0/16',
4518         'LV': '46.109.0.0/16',
4519         'LY': '41.252.0.0/14',
4520         'MA': '105.128.0.0/11',
4521         'MC': '88.209.64.0/18',
4522         'MD': '37.246.0.0/16',
4523         'ME': '178.175.0.0/17',
4524         'MF': '74.112.232.0/21',
4525         'MG': '154.126.0.0/17',
4526         'MH': '117.103.88.0/21',
4527         'MK': '77.28.0.0/15',
4528         'ML': '154.118.128.0/18',
4529         'MM': '37.111.0.0/17',
4530         'MN': '49.0.128.0/17',
4531         'MO': '60.246.0.0/16',
4532         'MP': '202.88.64.0/20',
4533         'MQ': '109.203.224.0/19',
4534         'MR': '41.188.64.0/18',
4535         'MS': '208.90.112.0/22',
4536         'MT': '46.11.0.0/16',
4537         'MU': '105.16.0.0/12',
4538         'MV': '27.114.128.0/18',
4539         'MW': '102.70.0.0/15',
4540         'MX': '187.192.0.0/11',
4541         'MY': '175.136.0.0/13',
4542         'MZ': '197.218.0.0/15',
4543         'NA': '41.182.0.0/16',
4544         'NC': '101.101.0.0/18',
4545         'NE': '197.214.0.0/18',
4546         'NF': '203.17.240.0/22',
4547         'NG': '105.112.0.0/12',
4548         'NI': '186.76.0.0/15',
4549         'NL': '145.96.0.0/11',
4550         'NO': '84.208.0.0/13',
4551         'NP': '36.252.0.0/15',
4552         'NR': '203.98.224.0/19',
4553         'NU': '49.156.48.0/22',
4554         'NZ': '49.224.0.0/14',
4555         'OM': '5.36.0.0/15',
4556         'PA': '186.72.0.0/15',
4557         'PE': '186.160.0.0/14',
4558         'PF': '123.50.64.0/18',
4559         'PG': '124.240.192.0/19',
4560         'PH': '49.144.0.0/13',
4561         'PK': '39.32.0.0/11',
4562         'PL': '83.0.0.0/11',
4563         'PM': '70.36.0.0/20',
4564         'PR': '66.50.0.0/16',
4565         'PS': '188.161.0.0/16',
4566         'PT': '85.240.0.0/13',
4567         'PW': '202.124.224.0/20',
4568         'PY': '181.120.0.0/14',
4569         'QA': '37.210.0.0/15',
4570         'RE': '102.35.0.0/16',
4571         'RO': '79.112.0.0/13',
4572         'RS': '93.86.0.0/15',
4573         'RU': '5.136.0.0/13',
4574         'RW': '41.186.0.0/16',
4575         'SA': '188.48.0.0/13',
4576         'SB': '202.1.160.0/19',
4577         'SC': '154.192.0.0/11',
4578         'SD': '102.120.0.0/13',
4579         'SE': '78.64.0.0/12',
4580         'SG': '8.128.0.0/10',
4581         'SI': '188.196.0.0/14',
4582         'SK': '78.98.0.0/15',
4583         'SL': '102.143.0.0/17',
4584         'SM': '89.186.32.0/19',
4585         'SN': '41.82.0.0/15',
4586         'SO': '154.115.192.0/18',
4587         'SR': '186.179.128.0/17',
4588         'SS': '105.235.208.0/21',
4589         'ST': '197.159.160.0/19',
4590         'SV': '168.243.0.0/16',
4591         'SX': '190.102.0.0/20',
4592         'SY': '5.0.0.0/16',
4593         'SZ': '41.84.224.0/19',
4594         'TC': '65.255.48.0/20',
4595         'TD': '154.68.128.0/19',
4596         'TG': '196.168.0.0/14',
4597         'TH': '171.96.0.0/13',
4598         'TJ': '85.9.128.0/18',
4599         'TK': '27.96.24.0/21',
4600         'TL': '180.189.160.0/20',
4601         'TM': '95.85.96.0/19',
4602         'TN': '197.0.0.0/11',
4603         'TO': '175.176.144.0/21',
4604         'TR': '78.160.0.0/11',
4605         'TT': '186.44.0.0/15',
4606         'TV': '202.2.96.0/19',
4607         'TW': '120.96.0.0/11',
4608         'TZ': '156.156.0.0/14',
4609         'UA': '37.52.0.0/14',
4610         'UG': '102.80.0.0/13',
4611         'US': '6.0.0.0/8',
4612         'UY': '167.56.0.0/13',
4613         'UZ': '84.54.64.0/18',
4614         'VA': '212.77.0.0/19',
4615         'VC': '207.191.240.0/21',
4616         'VE': '186.88.0.0/13',
4617         'VG': '66.81.192.0/20',
4618         'VI': '146.226.0.0/16',
4619         'VN': '14.160.0.0/11',
4620         'VU': '202.80.32.0/20',
4621         'WF': '117.20.32.0/21',
4622         'WS': '202.4.32.0/19',
4623         'YE': '134.35.0.0/16',
4624         'YT': '41.242.116.0/22',
4625         'ZA': '41.0.0.0/11',
4626         'ZM': '102.144.0.0/13',
4627         'ZW': '102.177.192.0/18',
4628     }
4629
4630     @classmethod
4631     def random_ipv4(cls, code_or_block):
4632         if len(code_or_block) == 2:
4633             block = cls._country_ip_map.get(code_or_block.upper())
4634             if not block:
4635                 return None
4636         else:
4637             block = code_or_block
4638         addr, preflen = block.split('/')
4639         addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
4640         addr_max = addr_min | (0xffffffff >> int(preflen))
4641         return str(socket.inet_ntoa(
4642             struct.pack('!L', random.randint(addr_min, addr_max))))
4643
4644
4645 class PerRequestProxyHandler(urllib.request.ProxyHandler):
4646     def __init__(self, proxies=None):
4647         # Set default handlers
4648         for type in ('http', 'https'):
4649             setattr(self, '%s_open' % type,
4650                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4651                         meth(r, proxy, type))
4652         urllib.request.ProxyHandler.__init__(self, proxies)
4653
4654     def proxy_open(self, req, proxy, type):
4655         req_proxy = req.headers.get('Ytdl-request-proxy')
4656         if req_proxy is not None:
4657             proxy = req_proxy
4658             del req.headers['Ytdl-request-proxy']
4659
4660         if proxy == '__noproxy__':
4661             return None  # No Proxy
4662         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4663             req.add_header('Ytdl-socks-proxy', proxy)
4664             # yt-dlp's http/https handlers do wrapping the socket with socks
4665             return None
4666         return urllib.request.ProxyHandler.proxy_open(
4667             self, req, proxy, type)
4668
4669
4670 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4671 # released into Public Domain
4672 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4673
4674 def long_to_bytes(n, blocksize=0):
4675     """long_to_bytes(n:long, blocksize:int) : string
4676     Convert a long integer to a byte string.
4677
4678     If optional blocksize is given and greater than zero, pad the front of the
4679     byte string with binary zeros so that the length is a multiple of
4680     blocksize.
4681     """
4682     # after much testing, this algorithm was deemed to be the fastest
4683     s = b''
4684     n = int(n)
4685     while n > 0:
4686         s = struct.pack('>I', n & 0xffffffff) + s
4687         n = n >> 32
4688     # strip off leading zeros
4689     for i in range(len(s)):
4690         if s[i] != b'\000'[0]:
4691             break
4692     else:
4693         # only happens when n == 0
4694         s = b'\000'
4695         i = 0
4696     s = s[i:]
4697     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4698     # de-padding being done above, but sigh...
4699     if blocksize > 0 and len(s) % blocksize:
4700         s = (blocksize - len(s) % blocksize) * b'\000' + s
4701     return s
4702
4703
4704 def bytes_to_long(s):
4705     """bytes_to_long(string) : long
4706     Convert a byte string to a long integer.
4707
4708     This is (essentially) the inverse of long_to_bytes().
4709     """
4710     acc = 0
4711     length = len(s)
4712     if length % 4:
4713         extra = (4 - length % 4)
4714         s = b'\000' * extra + s
4715         length = length + extra
4716     for i in range(0, length, 4):
4717         acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
4718     return acc
4719
4720
4721 def ohdave_rsa_encrypt(data, exponent, modulus):
4722     '''
4723     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4724
4725     Input:
4726         data: data to encrypt, bytes-like object
4727         exponent, modulus: parameter e and N of RSA algorithm, both integer
4728     Output: hex string of encrypted data
4729
4730     Limitation: supports one block encryption only
4731     '''
4732
4733     payload = int(binascii.hexlify(data[::-1]), 16)
4734     encrypted = pow(payload, exponent, modulus)
4735     return '%x' % encrypted
4736
4737
4738 def pkcs1pad(data, length):
4739     """
4740     Padding input data with PKCS#1 scheme
4741
4742     @param {int[]} data        input data
4743     @param {int}   length      target length
4744     @returns {int[]}           padded data
4745     """
4746     if len(data) > length - 11:
4747         raise ValueError('Input data too long for PKCS#1 padding')
4748
4749     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4750     return [0, 2] + pseudo_random + [0] + data
4751
4752
4753 def _base_n_table(n, table):
4754     if not table and not n:
4755         raise ValueError('Either table or n must be specified')
4756     table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
4757
4758     if n != len(table):
4759         raise ValueError(f'base {n} exceeds table length {len(table)}')
4760     return table
4761
4762
4763 def encode_base_n(num, n=None, table=None):
4764     """Convert given int to a base-n string"""
4765     table = _base_n_table(n, table)
4766     if not num:
4767         return table[0]
4768
4769     result, base = '', len(table)
4770     while num:
4771         result = table[num % base] + result
4772         num = num // base
4773     return result
4774
4775
4776 def decode_base_n(string, n=None, table=None):
4777     """Convert given base-n string to int"""
4778     table = {char: index for index, char in enumerate(_base_n_table(n, table))}
4779     result, base = 0, len(table)
4780     for char in string:
4781         result = result * base + table[char]
4782     return result
4783
4784
4785 def decode_base(value, digits):
4786     write_string('DeprecationWarning: yt_dlp.utils.decode_base is deprecated '
4787                  'and may be removed in a future version. Use yt_dlp.decode_base_n instead')
4788     return decode_base_n(value, table=digits)
4789
4790
4791 def decode_packed_codes(code):
4792     mobj = re.search(PACKED_CODES_RE, code)
4793     obfuscated_code, base, count, symbols = mobj.groups()
4794     base = int(base)
4795     count = int(count)
4796     symbols = symbols.split('|')
4797     symbol_table = {}
4798
4799     while count:
4800         count -= 1
4801         base_n_count = encode_base_n(count, base)
4802         symbol_table[base_n_count] = symbols[count] or base_n_count
4803
4804     return re.sub(
4805         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4806         obfuscated_code)
4807
4808
4809 def caesar(s, alphabet, shift):
4810     if shift == 0:
4811         return s
4812     l = len(alphabet)
4813     return ''.join(
4814         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4815         for c in s)
4816
4817
4818 def rot47(s):
4819     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4820
4821
4822 def parse_m3u8_attributes(attrib):
4823     info = {}
4824     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4825         if val.startswith('"'):
4826             val = val[1:-1]
4827         info[key] = val
4828     return info
4829
4830
4831 def urshift(val, n):
4832     return val >> n if val >= 0 else (val + 0x100000000) >> n
4833
4834
4835 # Based on png2str() written by @gdkchan and improved by @yokrysty
4836 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4837 def decode_png(png_data):
4838     # Reference: https://www.w3.org/TR/PNG/
4839     header = png_data[8:]
4840
4841     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4842         raise OSError('Not a valid PNG file.')
4843
4844     int_map = {1: '>B', 2: '>H', 4: '>I'}
4845     unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
4846
4847     chunks = []
4848
4849     while header:
4850         length = unpack_integer(header[:4])
4851         header = header[4:]
4852
4853         chunk_type = header[:4]
4854         header = header[4:]
4855
4856         chunk_data = header[:length]
4857         header = header[length:]
4858
4859         header = header[4:]  # Skip CRC
4860
4861         chunks.append({
4862             'type': chunk_type,
4863             'length': length,
4864             'data': chunk_data
4865         })
4866
4867     ihdr = chunks[0]['data']
4868
4869     width = unpack_integer(ihdr[:4])
4870     height = unpack_integer(ihdr[4:8])
4871
4872     idat = b''
4873
4874     for chunk in chunks:
4875         if chunk['type'] == b'IDAT':
4876             idat += chunk['data']
4877
4878     if not idat:
4879         raise OSError('Unable to read PNG data.')
4880
4881     decompressed_data = bytearray(zlib.decompress(idat))
4882
4883     stride = width * 3
4884     pixels = []
4885
4886     def _get_pixel(idx):
4887         x = idx % stride
4888         y = idx // stride
4889         return pixels[y][x]
4890
4891     for y in range(height):
4892         basePos = y * (1 + stride)
4893         filter_type = decompressed_data[basePos]
4894
4895         current_row = []
4896
4897         pixels.append(current_row)
4898
4899         for x in range(stride):
4900             color = decompressed_data[1 + basePos + x]
4901             basex = y * stride + x
4902             left = 0
4903             up = 0
4904
4905             if x > 2:
4906                 left = _get_pixel(basex - 3)
4907             if y > 0:
4908                 up = _get_pixel(basex - stride)
4909
4910             if filter_type == 1:  # Sub
4911                 color = (color + left) & 0xff
4912             elif filter_type == 2:  # Up
4913                 color = (color + up) & 0xff
4914             elif filter_type == 3:  # Average
4915                 color = (color + ((left + up) >> 1)) & 0xff
4916             elif filter_type == 4:  # Paeth
4917                 a = left
4918                 b = up
4919                 c = 0
4920
4921                 if x > 2 and y > 0:
4922                     c = _get_pixel(basex - stride - 3)
4923
4924                 p = a + b - c
4925
4926                 pa = abs(p - a)
4927                 pb = abs(p - b)
4928                 pc = abs(p - c)
4929
4930                 if pa <= pb and pa <= pc:
4931                     color = (color + a) & 0xff
4932                 elif pb <= pc:
4933                     color = (color + b) & 0xff
4934                 else:
4935                     color = (color + c) & 0xff
4936
4937             current_row.append(color)
4938
4939     return width, height, pixels
4940
4941
4942 def write_xattr(path, key, value):
4943     # Windows: Write xattrs to NTFS Alternate Data Streams:
4944     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4945     if compat_os_name == 'nt':
4946         assert ':' not in key
4947         assert os.path.exists(path)
4948
4949         try:
4950             with open(f'{path}:{key}', 'wb') as f:
4951                 f.write(value)
4952         except OSError as e:
4953             raise XAttrMetadataError(e.errno, e.strerror)
4954         return
4955
4956     # UNIX Method 1. Use xattrs/pyxattrs modules
4957
4958     setxattr = None
4959     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
4960         # Unicode arguments are not supported in pyxattr until version 0.5.0
4961         # See https://github.com/ytdl-org/youtube-dl/issues/5498
4962         if version_tuple(xattr.__version__) >= (0, 5, 0):
4963             setxattr = xattr.set
4964     elif xattr:
4965         setxattr = xattr.setxattr
4966
4967     if setxattr:
4968         try:
4969             setxattr(path, key, value)
4970         except OSError as e:
4971             raise XAttrMetadataError(e.errno, e.strerror)
4972         return
4973
4974     # UNIX Method 2. Use setfattr/xattr executables
4975     exe = ('setfattr' if check_executable('setfattr', ['--version'])
4976            else 'xattr' if check_executable('xattr', ['-h']) else None)
4977     if not exe:
4978         raise XAttrUnavailableError(
4979             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
4980             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
4981
4982     value = value.decode()
4983     try:
4984         _, stderr, returncode = Popen.run(
4985             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
4986             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
4987     except OSError as e:
4988         raise XAttrMetadataError(e.errno, e.strerror)
4989     if returncode:
4990         raise XAttrMetadataError(returncode, stderr)
4991
4992
4993 def random_birthday(year_field, month_field, day_field):
4994     start_date = datetime.date(1950, 1, 1)
4995     end_date = datetime.date(1995, 12, 31)
4996     offset = random.randint(0, (end_date - start_date).days)
4997     random_date = start_date + datetime.timedelta(offset)
4998     return {
4999         year_field: str(random_date.year),
5000         month_field: str(random_date.month),
5001         day_field: str(random_date.day),
5002     }
5003
5004
5005 # Templates for internet shortcut files, which are plain text files.
5006 DOT_URL_LINK_TEMPLATE = '''\
5007 [InternetShortcut]
5008 URL=%(url)s
5009 '''
5010
5011 DOT_WEBLOC_LINK_TEMPLATE = '''\
5012 <?xml version="1.0" encoding="UTF-8"?>
5013 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5014 <plist version="1.0">
5015 <dict>
5016 \t<key>URL</key>
5017 \t<string>%(url)s</string>
5018 </dict>
5019 </plist>
5020 '''
5021
5022 DOT_DESKTOP_LINK_TEMPLATE = '''\
5023 [Desktop Entry]
5024 Encoding=UTF-8
5025 Name=%(filename)s
5026 Type=Link
5027 URL=%(url)s
5028 Icon=text-html
5029 '''
5030
5031 LINK_TEMPLATES = {
5032     'url': DOT_URL_LINK_TEMPLATE,
5033     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5034     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5035 }
5036
5037
5038 def iri_to_uri(iri):
5039     """
5040     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5041
5042     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5043     """
5044
5045     iri_parts = urllib.parse.urlparse(iri)
5046
5047     if '[' in iri_parts.netloc:
5048         raise ValueError('IPv6 URIs are not, yet, supported.')
5049         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5050
5051     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5052
5053     net_location = ''
5054     if iri_parts.username:
5055         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5056         if iri_parts.password is not None:
5057             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5058         net_location += '@'
5059
5060     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5061     # The 'idna' encoding produces ASCII text.
5062     if iri_parts.port is not None and iri_parts.port != 80:
5063         net_location += ':' + str(iri_parts.port)
5064
5065     return urllib.parse.urlunparse(
5066         (iri_parts.scheme,
5067             net_location,
5068
5069             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5070
5071             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5072             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5073
5074             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5075             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5076
5077             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5078
5079     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5080
5081
5082 def to_high_limit_path(path):
5083     if sys.platform in ['win32', 'cygwin']:
5084         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5085         return '\\\\?\\' + os.path.abspath(path)
5086
5087     return path
5088
5089
5090 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
5091     val = traverse_obj(obj, *variadic(field))
5092     if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
5093         return default
5094     return template % func(val)
5095
5096
5097 def clean_podcast_url(url):
5098     return re.sub(r'''(?x)
5099         (?:
5100             (?:
5101                 chtbl\.com/track|
5102                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5103                 play\.podtrac\.com
5104             )/[^/]+|
5105             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5106             flex\.acast\.com|
5107             pd(?:
5108                 cn\.co| # https://podcorn.com/analytics-prefix/
5109                 st\.fm # https://podsights.com/docs/
5110             )/e
5111         )/''', '', url)
5112
5113
5114 _HEX_TABLE = '0123456789abcdef'
5115
5116
5117 def random_uuidv4():
5118     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5119
5120
5121 def make_dir(path, to_screen=None):
5122     try:
5123         dn = os.path.dirname(path)
5124         if dn and not os.path.exists(dn):
5125             os.makedirs(dn)
5126         return True
5127     except OSError as err:
5128         if callable(to_screen) is not None:
5129             to_screen('unable to create directory ' + error_to_compat_str(err))
5130         return False
5131
5132
5133 def get_executable_path():
5134     from .update import _get_variant_and_executable_path
5135
5136     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5137
5138
5139 def load_plugins(name, suffix, namespace):
5140     classes = {}
5141     with contextlib.suppress(FileNotFoundError):
5142         plugins_spec = importlib.util.spec_from_file_location(
5143             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5144         plugins = importlib.util.module_from_spec(plugins_spec)
5145         sys.modules[plugins_spec.name] = plugins
5146         plugins_spec.loader.exec_module(plugins)
5147         for name in dir(plugins):
5148             if name in namespace:
5149                 continue
5150             if not name.endswith(suffix):
5151                 continue
5152             klass = getattr(plugins, name)
5153             classes[name] = namespace[name] = klass
5154     return classes
5155
5156
5157 def traverse_obj(
5158         obj, *path_list, default=None, expected_type=None, get_all=True,
5159         casesense=True, is_user_input=False, traverse_string=False):
5160     ''' Traverse nested list/dict/tuple
5161     @param path_list        A list of paths which are checked one by one.
5162                             Each path is a list of keys where each key is a:
5163                               - None:     Do nothing
5164                               - string:   A dictionary key
5165                               - int:      An index into a list
5166                               - tuple:    A list of keys all of which will be traversed
5167                               - Ellipsis: Fetch all values in the object
5168                               - Function: Takes the key and value as arguments
5169                                           and returns whether the key matches or not
5170     @param default          Default value to return
5171     @param expected_type    Only accept final value of this type (Can also be any callable)
5172     @param get_all          Return all the values obtained from a path or only the first one
5173     @param casesense        Whether to consider dictionary keys as case sensitive
5174     @param is_user_input    Whether the keys are generated from user input. If True,
5175                             strings are converted to int/slice if necessary
5176     @param traverse_string  Whether to traverse inside strings. If True, any
5177                             non-compatible object will also be converted into a string
5178     # TODO: Write tests
5179     '''
5180     if not casesense:
5181         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
5182         path_list = (map(_lower, variadic(path)) for path in path_list)
5183
5184     def _traverse_obj(obj, path, _current_depth=0):
5185         nonlocal depth
5186         path = tuple(variadic(path))
5187         for i, key in enumerate(path):
5188             if None in (key, obj):
5189                 return obj
5190             if isinstance(key, (list, tuple)):
5191                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
5192                 key = ...
5193             if key is ...:
5194                 obj = (obj.values() if isinstance(obj, dict)
5195                        else obj if isinstance(obj, (list, tuple, LazyList))
5196                        else str(obj) if traverse_string else [])
5197                 _current_depth += 1
5198                 depth = max(depth, _current_depth)
5199                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
5200             elif callable(key):
5201                 if isinstance(obj, (list, tuple, LazyList)):
5202                     obj = enumerate(obj)
5203                 elif isinstance(obj, dict):
5204                     obj = obj.items()
5205                 else:
5206                     if not traverse_string:
5207                         return None
5208                     obj = str(obj)
5209                 _current_depth += 1
5210                 depth = max(depth, _current_depth)
5211                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
5212             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
5213                 obj = (obj.get(key) if casesense or (key in obj)
5214                        else next((v for k, v in obj.items() if _lower(k) == key), None))
5215             else:
5216                 if is_user_input:
5217                     key = (int_or_none(key) if ':' not in key
5218                            else slice(*map(int_or_none, key.split(':'))))
5219                     if key == slice(None):
5220                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
5221                 if not isinstance(key, (int, slice)):
5222                     return None
5223                 if not isinstance(obj, (list, tuple, LazyList)):
5224                     if not traverse_string:
5225                         return None
5226                     obj = str(obj)
5227                 try:
5228                     obj = obj[key]
5229                 except IndexError:
5230                     return None
5231         return obj
5232
5233     if isinstance(expected_type, type):
5234         type_test = lambda val: val if isinstance(val, expected_type) else None
5235     else:
5236         type_test = expected_type or IDENTITY
5237
5238     for path in path_list:
5239         depth = 0
5240         val = _traverse_obj(obj, path)
5241         if val is not None:
5242             if depth:
5243                 for _ in range(depth - 1):
5244                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5245                 val = [v for v in map(type_test, val) if v is not None]
5246                 if val:
5247                     return val if get_all else val[0]
5248             else:
5249                 val = type_test(val)
5250                 if val is not None:
5251                     return val
5252     return default
5253
5254
5255 def traverse_dict(dictn, keys, casesense=True):
5256     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5257                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5258     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5259
5260
5261 def get_first(obj, keys, **kwargs):
5262     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5263
5264
5265 def variadic(x, allowed_types=(str, bytes, dict)):
5266     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5267
5268
5269 def time_seconds(**kwargs):
5270     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5271     return t.timestamp()
5272
5273
5274 # create a JSON Web Signature (jws) with HS256 algorithm
5275 # the resulting format is in JWS Compact Serialization
5276 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5277 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5278 def jwt_encode_hs256(payload_data, key, headers={}):
5279     header_data = {
5280         'alg': 'HS256',
5281         'typ': 'JWT',
5282     }
5283     if headers:
5284         header_data.update(headers)
5285     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5286     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5287     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5288     signature_b64 = base64.b64encode(h.digest())
5289     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5290     return token
5291
5292
5293 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5294 def jwt_decode_hs256(jwt):
5295     header_b64, payload_b64, signature_b64 = jwt.split('.')
5296     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5297     return payload_data
5298
5299
5300 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5301
5302
5303 @functools.cache
5304 def supports_terminal_sequences(stream):
5305     if compat_os_name == 'nt':
5306         if not WINDOWS_VT_MODE:
5307             return False
5308     elif not os.getenv('TERM'):
5309         return False
5310     try:
5311         return stream.isatty()
5312     except BaseException:
5313         return False
5314
5315
5316 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5317     if get_windows_version() < (10, 0, 10586):
5318         return
5319     global WINDOWS_VT_MODE
5320     try:
5321         Popen.run('', shell=True)
5322     except Exception:
5323         return
5324
5325     WINDOWS_VT_MODE = True
5326     supports_terminal_sequences.cache_clear()
5327
5328
5329 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5330
5331
5332 def remove_terminal_sequences(string):
5333     return _terminal_sequences_re.sub('', string)
5334
5335
5336 def number_of_digits(number):
5337     return len('%d' % number)
5338
5339
5340 def join_nonempty(*values, delim='-', from_dict=None):
5341     if from_dict is not None:
5342         values = (traverse_obj(from_dict, variadic(v)) for v in values)
5343     return delim.join(map(str, filter(None, values)))
5344
5345
5346 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5347     """
5348     Find the largest format dimensions in terms of video width and, for each thumbnail:
5349     * Modify the URL: Match the width with the provided regex and replace with the former width
5350     * Update dimensions
5351
5352     This function is useful with video services that scale the provided thumbnails on demand
5353     """
5354     _keys = ('width', 'height')
5355     max_dimensions = max(
5356         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5357         default=(0, 0))
5358     if not max_dimensions[0]:
5359         return thumbnails
5360     return [
5361         merge_dicts(
5362             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5363             dict(zip(_keys, max_dimensions)), thumbnail)
5364         for thumbnail in thumbnails
5365     ]
5366
5367
5368 def parse_http_range(range):
5369     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5370     if not range:
5371         return None, None, None
5372     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5373     if not crg:
5374         return None, None, None
5375     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5376
5377
5378 def read_stdin(what):
5379     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5380     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5381     return sys.stdin
5382
5383
5384 class Config:
5385     own_args = None
5386     parsed_args = None
5387     filename = None
5388     __initialized = False
5389
5390     def __init__(self, parser, label=None):
5391         self.parser, self.label = parser, label
5392         self._loaded_paths, self.configs = set(), []
5393
5394     def init(self, args=None, filename=None):
5395         assert not self.__initialized
5396         directory = ''
5397         if filename:
5398             location = os.path.realpath(filename)
5399             directory = os.path.dirname(location)
5400             if location in self._loaded_paths:
5401                 return False
5402             self._loaded_paths.add(location)
5403
5404         self.own_args, self.__initialized = args, True
5405         opts, _ = self.parser.parse_known_args(args)
5406         self.parsed_args, self.filename = args, filename
5407
5408         for location in opts.config_locations or []:
5409             if location == '-':
5410                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5411                 continue
5412             location = os.path.join(directory, expand_path(location))
5413             if os.path.isdir(location):
5414                 location = os.path.join(location, 'yt-dlp.conf')
5415             if not os.path.exists(location):
5416                 self.parser.error(f'config location {location} does not exist')
5417             self.append_config(self.read_file(location), location)
5418         return True
5419
5420     def __str__(self):
5421         label = join_nonempty(
5422             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5423             delim=' ')
5424         return join_nonempty(
5425             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5426             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5427             delim='\n')
5428
5429     @staticmethod
5430     def read_file(filename, default=[]):
5431         try:
5432             optionf = open(filename)
5433         except OSError:
5434             return default  # silently skip if file is not present
5435         try:
5436             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5437             contents = optionf.read()
5438             res = shlex.split(contents, comments=True)
5439         except Exception as err:
5440             raise ValueError(f'Unable to parse "{filename}": {err}')
5441         finally:
5442             optionf.close()
5443         return res
5444
5445     @staticmethod
5446     def hide_login_info(opts):
5447         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5448         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5449
5450         def _scrub_eq(o):
5451             m = eqre.match(o)
5452             if m:
5453                 return m.group('key') + '=PRIVATE'
5454             else:
5455                 return o
5456
5457         opts = list(map(_scrub_eq, opts))
5458         for idx, opt in enumerate(opts):
5459             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5460                 opts[idx + 1] = 'PRIVATE'
5461         return opts
5462
5463     def append_config(self, *args, label=None):
5464         config = type(self)(self.parser, label)
5465         config._loaded_paths = self._loaded_paths
5466         if config.init(*args):
5467             self.configs.append(config)
5468
5469     @property
5470     def all_args(self):
5471         for config in reversed(self.configs):
5472             yield from config.all_args
5473         yield from self.parsed_args or []
5474
5475     def parse_known_args(self, **kwargs):
5476         return self.parser.parse_known_args(self.all_args, **kwargs)
5477
5478     def parse_args(self):
5479         return self.parser.parse_args(self.all_args)
5480
5481
5482 class WebSocketsWrapper():
5483     """Wraps websockets module to use in non-async scopes"""
5484     pool = None
5485
5486     def __init__(self, url, headers=None, connect=True):
5487         self.loop = asyncio.new_event_loop()
5488         # XXX: "loop" is deprecated
5489         self.conn = websockets.connect(
5490             url, extra_headers=headers, ping_interval=None,
5491             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5492         if connect:
5493             self.__enter__()
5494         atexit.register(self.__exit__, None, None, None)
5495
5496     def __enter__(self):
5497         if not self.pool:
5498             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5499         return self
5500
5501     def send(self, *args):
5502         self.run_with_loop(self.pool.send(*args), self.loop)
5503
5504     def recv(self, *args):
5505         return self.run_with_loop(self.pool.recv(*args), self.loop)
5506
5507     def __exit__(self, type, value, traceback):
5508         try:
5509             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5510         finally:
5511             self.loop.close()
5512             self._cancel_all_tasks(self.loop)
5513
5514     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5515     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5516     @staticmethod
5517     def run_with_loop(main, loop):
5518         if not asyncio.iscoroutine(main):
5519             raise ValueError(f'a coroutine was expected, got {main!r}')
5520
5521         try:
5522             return loop.run_until_complete(main)
5523         finally:
5524             loop.run_until_complete(loop.shutdown_asyncgens())
5525             if hasattr(loop, 'shutdown_default_executor'):
5526                 loop.run_until_complete(loop.shutdown_default_executor())
5527
5528     @staticmethod
5529     def _cancel_all_tasks(loop):
5530         to_cancel = asyncio.all_tasks(loop)
5531
5532         if not to_cancel:
5533             return
5534
5535         for task in to_cancel:
5536             task.cancel()
5537
5538         # XXX: "loop" is removed in python 3.10+
5539         loop.run_until_complete(
5540             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5541
5542         for task in to_cancel:
5543             if task.cancelled():
5544                 continue
5545             if task.exception() is not None:
5546                 loop.call_exception_handler({
5547                     'message': 'unhandled exception during asyncio.run() shutdown',
5548                     'exception': task.exception(),
5549                     'task': task,
5550                 })
5551
5552
5553 def merge_headers(*dicts):
5554     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5555     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5556
5557
5558 def cached_method(f):
5559     """Cache a method"""
5560     signature = inspect.signature(f)
5561
5562     @functools.wraps(f)
5563     def wrapper(self, *args, **kwargs):
5564         bound_args = signature.bind(self, *args, **kwargs)
5565         bound_args.apply_defaults()
5566         key = tuple(bound_args.arguments.values())
5567
5568         if not hasattr(self, '__cached_method__cache'):
5569             self.__cached_method__cache = {}
5570         cache = self.__cached_method__cache.setdefault(f.__name__, {})
5571         if key not in cache:
5572             cache[key] = f(self, *args, **kwargs)
5573         return cache[key]
5574     return wrapper
5575
5576
5577 class classproperty:
5578     """property access for class methods"""
5579
5580     def __init__(self, func):
5581         functools.update_wrapper(self, func)
5582         self.func = func
5583
5584     def __get__(self, _, cls):
5585         return self.func(cls)
5586
5587
5588 class Namespace(types.SimpleNamespace):
5589     """Immutable namespace"""
5590
5591     def __iter__(self):
5592         return iter(self.__dict__.values())
5593
5594     @property
5595     def items_(self):
5596         return self.__dict__.items()
5597
5598
5599 # Deprecated
5600 has_certifi = bool(certifi)
5601 has_websockets = bool(websockets)