yt_dlp/utils.py

   1 import atexit
   2 import base64
   3 import binascii
   4 import calendar
   5 import codecs
   6 import collections
   7 import contextlib
   8 import ctypes
   9 import datetime
  10 import email.header
  11 import email.utils
  12 import errno
  13 import gzip
  14 import hashlib
  15 import hmac
  16 import html.entities
  17 import html.parser
  18 import http.client
  19 import http.cookiejar
  20 import importlib.util
  21 import inspect
  22 import io
  23 import itertools
  24 import json
  25 import locale
  26 import math
  27 import mimetypes
  28 import operator
  29 import os
  30 import platform
  31 import random
  32 import re
  33 import shlex
  34 import socket
  35 import ssl
  36 import struct
  37 import subprocess
  38 import sys
  39 import tempfile
  40 import time
  41 import traceback
  42 import types
  43 import urllib.error
  44 import urllib.parse
  45 import urllib.request
  46 import xml.etree.ElementTree
  47 import zlib
  48
  49 from .compat import asyncio, functools  # isort: split
  50 from .compat import (
  51     compat_etree_fromstring,
  52     compat_expanduser,
  53     compat_HTMLParseError,
  54     compat_os_name,
  55     compat_shlex_quote,
  56 )
  57 from .dependencies import brotli, certifi, websockets, xattr
  58 from .socks import ProxyType, sockssocket
  59
  60
  61 def register_socks_protocols():
  62     # "Register" SOCKS protocols
  63     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  64     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  65     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  66         if scheme not in urllib.parse.uses_netloc:
  67             urllib.parse.uses_netloc.append(scheme)
  68
  69
  70 # This is not clearly defined otherwise
  71 compiled_regex_type = type(re.compile(''))
  72
  73
  74 def random_user_agent():
  75     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  76     _CHROME_VERSIONS = (
  77         '90.0.4430.212',
  78         '90.0.4430.24',
  79         '90.0.4430.70',
  80         '90.0.4430.72',
  81         '90.0.4430.85',
  82         '90.0.4430.93',
  83         '91.0.4472.101',
  84         '91.0.4472.106',
  85         '91.0.4472.114',
  86         '91.0.4472.124',
  87         '91.0.4472.164',
  88         '91.0.4472.19',
  89         '91.0.4472.77',
  90         '92.0.4515.107',
  91         '92.0.4515.115',
  92         '92.0.4515.131',
  93         '92.0.4515.159',
  94         '92.0.4515.43',
  95         '93.0.4556.0',
  96         '93.0.4577.15',
  97         '93.0.4577.63',
  98         '93.0.4577.82',
  99         '94.0.4606.41',
 100         '94.0.4606.54',
 101         '94.0.4606.61',
 102         '94.0.4606.71',
 103         '94.0.4606.81',
 104         '94.0.4606.85',
 105         '95.0.4638.17',
 106         '95.0.4638.50',
 107         '95.0.4638.54',
 108         '95.0.4638.69',
 109         '95.0.4638.74',
 110         '96.0.4664.18',
 111         '96.0.4664.45',
 112         '96.0.4664.55',
 113         '96.0.4664.93',
 114         '97.0.4692.20',
 115     )
 116     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 117
 118
 119 SUPPORTED_ENCODINGS = [
 120     'gzip', 'deflate'
 121 ]
 122 if brotli:
 123     SUPPORTED_ENCODINGS.append('br')
 124
 125 std_headers = {
 126     'User-Agent': random_user_agent(),
 127     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 128     'Accept-Language': 'en-us,en;q=0.5',
 129     'Sec-Fetch-Mode': 'navigate',
 130 }
 131
 132
 133 USER_AGENTS = {
 134     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 135 }
 136
 137
 138 NO_DEFAULT = object()
 139 IDENTITY = lambda x: x
 140
 141 ENGLISH_MONTH_NAMES = [
 142     'January', 'February', 'March', 'April', 'May', 'June',
 143     'July', 'August', 'September', 'October', 'November', 'December']
 144
 145 MONTH_NAMES = {
 146     'en': ENGLISH_MONTH_NAMES,
 147     'fr': [
 148         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 149         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 150 }
 151
 152 KNOWN_EXTENSIONS = (
 153     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 154     'flv', 'f4v', 'f4a', 'f4b',
 155     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 156     'mkv', 'mka', 'mk3d',
 157     'avi', 'divx',
 158     'mov',
 159     'asf', 'wmv', 'wma',
 160     '3gp', '3g2',
 161     'mp3',
 162     'flac',
 163     'ape',
 164     'wav',
 165     'f4f', 'f4m', 'm3u8', 'smil')
 166
 167 # needed for sanitizing filenames in restricted mode
 168 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 169                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 170                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 171
 172 DATE_FORMATS = (
 173     '%d %B %Y',
 174     '%d %b %Y',
 175     '%B %d %Y',
 176     '%B %dst %Y',
 177     '%B %dnd %Y',
 178     '%B %drd %Y',
 179     '%B %dth %Y',
 180     '%b %d %Y',
 181     '%b %dst %Y',
 182     '%b %dnd %Y',
 183     '%b %drd %Y',
 184     '%b %dth %Y',
 185     '%b %dst %Y %I:%M',
 186     '%b %dnd %Y %I:%M',
 187     '%b %drd %Y %I:%M',
 188     '%b %dth %Y %I:%M',
 189     '%Y %m %d',
 190     '%Y-%m-%d',
 191     '%Y.%m.%d.',
 192     '%Y/%m/%d',
 193     '%Y/%m/%d %H:%M',
 194     '%Y/%m/%d %H:%M:%S',
 195     '%Y%m%d%H%M',
 196     '%Y%m%d%H%M%S',
 197     '%Y%m%d',
 198     '%Y-%m-%d %H:%M',
 199     '%Y-%m-%d %H:%M:%S',
 200     '%Y-%m-%d %H:%M:%S.%f',
 201     '%Y-%m-%d %H:%M:%S:%f',
 202     '%d.%m.%Y %H:%M',
 203     '%d.%m.%Y %H.%M',
 204     '%Y-%m-%dT%H:%M:%SZ',
 205     '%Y-%m-%dT%H:%M:%S.%fZ',
 206     '%Y-%m-%dT%H:%M:%S.%f0Z',
 207     '%Y-%m-%dT%H:%M:%S',
 208     '%Y-%m-%dT%H:%M:%S.%f',
 209     '%Y-%m-%dT%H:%M',
 210     '%b %d %Y at %H:%M',
 211     '%b %d %Y at %H:%M:%S',
 212     '%B %d %Y at %H:%M',
 213     '%B %d %Y at %H:%M:%S',
 214     '%H:%M %d-%b-%Y',
 215 )
 216
 217 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 218 DATE_FORMATS_DAY_FIRST.extend([
 219     '%d-%m-%Y',
 220     '%d.%m.%Y',
 221     '%d.%m.%y',
 222     '%d/%m/%Y',
 223     '%d/%m/%y',
 224     '%d/%m/%Y %H:%M:%S',
 225 ])
 226
 227 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 228 DATE_FORMATS_MONTH_FIRST.extend([
 229     '%m-%d-%Y',
 230     '%m.%d.%Y',
 231     '%m/%d/%Y',
 232     '%m/%d/%y',
 233     '%m/%d/%Y %H:%M:%S',
 234 ])
 235
 236 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 237 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?})\s*</script>'
 238
 239 NUMBER_RE = r'\d+(?:\.\d+)?'
 240
 241
 242 @functools.cache
 243 def preferredencoding():
 244     """Get preferred encoding.
 245
 246     Returns the best encoding scheme for the system, based on
 247     locale.getpreferredencoding() and some further tweaks.
 248     """
 249     try:
 250         pref = locale.getpreferredencoding()
 251         'TEST'.encode(pref)
 252     except Exception:
 253         pref = 'UTF-8'
 254
 255     return pref
 256
 257
 258 def write_json_file(obj, fn):
 259     """ Encode obj as JSON and write it to fn, atomically if possible """
 260
 261     tf = tempfile.NamedTemporaryFile(
 262         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 263         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 264
 265     try:
 266         with tf:
 267             json.dump(obj, tf, ensure_ascii=False)
 268         if sys.platform == 'win32':
 269             # Need to remove existing file on Windows, else os.rename raises
 270             # WindowsError or FileExistsError.
 271             with contextlib.suppress(OSError):
 272                 os.unlink(fn)
 273         with contextlib.suppress(OSError):
 274             mask = os.umask(0)
 275             os.umask(mask)
 276             os.chmod(tf.name, 0o666 & ~mask)
 277         os.rename(tf.name, fn)
 278     except Exception:
 279         with contextlib.suppress(OSError):
 280             os.remove(tf.name)
 281         raise
 282
 283
 284 def find_xpath_attr(node, xpath, key, val=None):
 285     """ Find the xpath xpath[@key=val] """
 286     assert re.match(r'^[a-zA-Z_-]+$', key)
 287     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 288     return node.find(expr)
 289
 290 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 291 # the namespace parameter
 292
 293
 294 def xpath_with_ns(path, ns_map):
 295     components = [c.split(':') for c in path.split('/')]
 296     replaced = []
 297     for c in components:
 298         if len(c) == 1:
 299             replaced.append(c[0])
 300         else:
 301             ns, tag = c
 302             replaced.append('{%s}%s' % (ns_map[ns], tag))
 303     return '/'.join(replaced)
 304
 305
 306 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 307     def _find_xpath(xpath):
 308         return node.find(xpath)
 309
 310     if isinstance(xpath, str):
 311         n = _find_xpath(xpath)
 312     else:
 313         for xp in xpath:
 314             n = _find_xpath(xp)
 315             if n is not None:
 316                 break
 317
 318     if n is None:
 319         if default is not NO_DEFAULT:
 320             return default
 321         elif fatal:
 322             name = xpath if name is None else name
 323             raise ExtractorError('Could not find XML element %s' % name)
 324         else:
 325             return None
 326     return n
 327
 328
 329 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 330     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 331     if n is None or n == default:
 332         return n
 333     if n.text is None:
 334         if default is not NO_DEFAULT:
 335             return default
 336         elif fatal:
 337             name = xpath if name is None else name
 338             raise ExtractorError('Could not find XML element\'s text %s' % name)
 339         else:
 340             return None
 341     return n.text
 342
 343
 344 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 345     n = find_xpath_attr(node, xpath, key)
 346     if n is None:
 347         if default is not NO_DEFAULT:
 348             return default
 349         elif fatal:
 350             name = f'{xpath}[@{key}]' if name is None else name
 351             raise ExtractorError('Could not find XML attribute %s' % name)
 352         else:
 353             return None
 354     return n.attrib[key]
 355
 356
 357 def get_element_by_id(id, html, **kwargs):
 358     """Return the content of the tag with the specified ID in the passed HTML document"""
 359     return get_element_by_attribute('id', id, html, **kwargs)
 360
 361
 362 def get_element_html_by_id(id, html, **kwargs):
 363     """Return the html of the tag with the specified ID in the passed HTML document"""
 364     return get_element_html_by_attribute('id', id, html, **kwargs)
 365
 366
 367 def get_element_by_class(class_name, html):
 368     """Return the content of the first tag with the specified class in the passed HTML document"""
 369     retval = get_elements_by_class(class_name, html)
 370     return retval[0] if retval else None
 371
 372
 373 def get_element_html_by_class(class_name, html):
 374     """Return the html of the first tag with the specified class in the passed HTML document"""
 375     retval = get_elements_html_by_class(class_name, html)
 376     return retval[0] if retval else None
 377
 378
 379 def get_element_by_attribute(attribute, value, html, **kwargs):
 380     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 381     return retval[0] if retval else None
 382
 383
 384 def get_element_html_by_attribute(attribute, value, html, **kargs):
 385     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 386     return retval[0] if retval else None
 387
 388
 389 def get_elements_by_class(class_name, html, **kargs):
 390     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 391     return get_elements_by_attribute(
 392         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 393         html, escape_value=False)
 394
 395
 396 def get_elements_html_by_class(class_name, html):
 397     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 398     return get_elements_html_by_attribute(
 399         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 400         html, escape_value=False)
 401
 402
 403 def get_elements_by_attribute(*args, **kwargs):
 404     """Return the content of the tag with the specified attribute in the passed HTML document"""
 405     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 406
 407
 408 def get_elements_html_by_attribute(*args, **kwargs):
 409     """Return the html of the tag with the specified attribute in the passed HTML document"""
 410     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 411
 412
 413 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 414     """
 415     Return the text (content) and the html (whole) of the tag with the specified
 416     attribute in the passed HTML document
 417     """
 418
 419     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 420
 421     value = re.escape(value) if escape_value else value
 422
 423     partial_element_re = rf'''(?x)
 424         <(?P<tag>[a-zA-Z0-9:._-]+)
 425          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 426          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 427         '''
 428
 429     for m in re.finditer(partial_element_re, html):
 430         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 431
 432         yield (
 433             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 434             whole
 435         )
 436
 437
 438 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
 439     """
 440     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 441     closing tag for the first opening tag it has encountered, and can be used
 442     as a context manager
 443     """
 444
 445     class HTMLBreakOnClosingTagException(Exception):
 446         pass
 447
 448     def __init__(self):
 449         self.tagstack = collections.deque()
 450         html.parser.HTMLParser.__init__(self)
 451
 452     def __enter__(self):
 453         return self
 454
 455     def __exit__(self, *_):
 456         self.close()
 457
 458     def close(self):
 459         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 460         # so data remains buffered; we no longer have any interest in it, thus
 461         # override this method to discard it
 462         pass
 463
 464     def handle_starttag(self, tag, _):
 465         self.tagstack.append(tag)
 466
 467     def handle_endtag(self, tag):
 468         if not self.tagstack:
 469             raise compat_HTMLParseError('no tags in the stack')
 470         while self.tagstack:
 471             inner_tag = self.tagstack.pop()
 472             if inner_tag == tag:
 473                 break
 474         else:
 475             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 476         if not self.tagstack:
 477             raise self.HTMLBreakOnClosingTagException()
 478
 479
 480 def get_element_text_and_html_by_tag(tag, html):
 481     """
 482     For the first element with the specified tag in the passed HTML document
 483     return its' content (text) and the whole element (html)
 484     """
 485     def find_or_raise(haystack, needle, exc):
 486         try:
 487             return haystack.index(needle)
 488         except ValueError:
 489             raise exc
 490     closing_tag = f'</{tag}>'
 491     whole_start = find_or_raise(
 492         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 493     content_start = find_or_raise(
 494         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 495     content_start += whole_start + 1
 496     with HTMLBreakOnClosingTagParser() as parser:
 497         parser.feed(html[whole_start:content_start])
 498         if not parser.tagstack or parser.tagstack[0] != tag:
 499             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 500         offset = content_start
 501         while offset < len(html):
 502             next_closing_tag_start = find_or_raise(
 503                 html[offset:], closing_tag,
 504                 compat_HTMLParseError(f'closing {tag} tag not found'))
 505             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 506             try:
 507                 parser.feed(html[offset:offset + next_closing_tag_end])
 508                 offset += next_closing_tag_end
 509             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 510                 return html[content_start:offset + next_closing_tag_start], \
 511                     html[whole_start:offset + next_closing_tag_end]
 512         raise compat_HTMLParseError('unexpected end of html')
 513
 514
 515 class HTMLAttributeParser(html.parser.HTMLParser):
 516     """Trivial HTML parser to gather the attributes for a single element"""
 517
 518     def __init__(self):
 519         self.attrs = {}
 520         html.parser.HTMLParser.__init__(self)
 521
 522     def handle_starttag(self, tag, attrs):
 523         self.attrs = dict(attrs)
 524
 525
 526 class HTMLListAttrsParser(html.parser.HTMLParser):
 527     """HTML parser to gather the attributes for the elements of a list"""
 528
 529     def __init__(self):
 530         html.parser.HTMLParser.__init__(self)
 531         self.items = []
 532         self._level = 0
 533
 534     def handle_starttag(self, tag, attrs):
 535         if tag == 'li' and self._level == 0:
 536             self.items.append(dict(attrs))
 537         self._level += 1
 538
 539     def handle_endtag(self, tag):
 540         self._level -= 1
 541
 542
 543 def extract_attributes(html_element):
 544     """Given a string for an HTML element such as
 545     <el
 546          a="foo" B="bar" c="&98;az" d=boz
 547          empty= noval entity="&amp;"
 548          sq='"' dq="'"
 549     >
 550     Decode and return a dictionary of attributes.
 551     {
 552         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 553         'empty': '', 'noval': None, 'entity': '&',
 554         'sq': '"', 'dq': '\''
 555     }.
 556     """
 557     parser = HTMLAttributeParser()
 558     with contextlib.suppress(compat_HTMLParseError):
 559         parser.feed(html_element)
 560         parser.close()
 561     return parser.attrs
 562
 563
 564 def parse_list(webpage):
 565     """Given a string for an series of HTML <li> elements,
 566     return a dictionary of their attributes"""
 567     parser = HTMLListAttrsParser()
 568     parser.feed(webpage)
 569     parser.close()
 570     return parser.items
 571
 572
 573 def clean_html(html):
 574     """Clean an HTML snippet into a readable string"""
 575
 576     if html is None:  # Convenience for sanitizing descriptions etc.
 577         return html
 578
 579     html = re.sub(r'\s+', ' ', html)
 580     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 581     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 582     # Strip html tags
 583     html = re.sub('<.*?>', '', html)
 584     # Replace html entities
 585     html = unescapeHTML(html)
 586     return html.strip()
 587
 588
 589 class LenientJSONDecoder(json.JSONDecoder):
 590     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 591         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 592         super().__init__(*args, **kwargs)
 593
 594     def decode(self, s):
 595         if self.transform_source:
 596             s = self.transform_source(s)
 597         if self.ignore_extra:
 598             return self.raw_decode(s.lstrip())[0]
 599         return super().decode(s)
 600
 601
 602 def sanitize_open(filename, open_mode):
 603     """Try to open the given filename, and slightly tweak it if this fails.
 604
 605     Attempts to open the given filename. If this fails, it tries to change
 606     the filename slightly, step by step, until it's either able to open it
 607     or it fails and raises a final exception, like the standard open()
 608     function.
 609
 610     It returns the tuple (stream, definitive_file_name).
 611     """
 612     if filename == '-':
 613         if sys.platform == 'win32':
 614             import msvcrt
 615             msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 616         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 617
 618     for attempt in range(2):
 619         try:
 620             try:
 621                 if sys.platform == 'win32':
 622                     # FIXME: An exclusive lock also locks the file from being read.
 623                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 624                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 625                     raise LockingUnsupportedError()
 626                 stream = locked_file(filename, open_mode, block=False).__enter__()
 627             except OSError:
 628                 stream = open(filename, open_mode)
 629             return stream, filename
 630         except OSError as err:
 631             if attempt or err.errno in (errno.EACCES,):
 632                 raise
 633             old_filename, filename = filename, sanitize_path(filename)
 634             if old_filename == filename:
 635                 raise
 636
 637
 638 def timeconvert(timestr):
 639     """Convert RFC 2822 defined time string into system timestamp"""
 640     timestamp = None
 641     timetuple = email.utils.parsedate_tz(timestr)
 642     if timetuple is not None:
 643         timestamp = email.utils.mktime_tz(timetuple)
 644     return timestamp
 645
 646
 647 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 648     """Sanitizes a string so it could be used as part of a filename.
 649     @param restricted   Use a stricter subset of allowed characters
 650     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 651                         If unset, yt-dlp's new sanitization rules are in effect
 652     """
 653     if s == '':
 654         return ''
 655
 656     def replace_insane(char):
 657         if restricted and char in ACCENT_CHARS:
 658             return ACCENT_CHARS[char]
 659         elif not restricted and char == '\n':
 660             return '\0 '
 661         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 662             return ''
 663         elif char == '"':
 664             return '' if restricted else '\''
 665         elif char == ':':
 666             return '\0_\0-' if restricted else '\0 \0-'
 667         elif char in '\\/|*<>':
 668             return '\0_'
 669         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 670             return '\0_'
 671         return char
 672
 673     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 674     result = ''.join(map(replace_insane, s))
 675     if is_id is NO_DEFAULT:
 676         result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
 677         STRIP_RE = r'(?:\0.|[ _-])*'
 678         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 679     result = result.replace('\0', '') or '_'
 680
 681     if not is_id:
 682         while '__' in result:
 683             result = result.replace('__', '_')
 684         result = result.strip('_')
 685         # Common case of "Foreign band name - English song title"
 686         if restricted and result.startswith('-_'):
 687             result = result[2:]
 688         if result.startswith('-'):
 689             result = '_' + result[len('-'):]
 690         result = result.lstrip('.')
 691         if not result:
 692             result = '_'
 693     return result
 694
 695
 696 def sanitize_path(s, force=False):
 697     """Sanitizes and normalizes path on Windows"""
 698     if sys.platform == 'win32':
 699         force = False
 700         drive_or_unc, _ = os.path.splitdrive(s)
 701     elif force:
 702         drive_or_unc = ''
 703     else:
 704         return s
 705
 706     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 707     if drive_or_unc:
 708         norm_path.pop(0)
 709     sanitized_path = [
 710         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 711         for path_part in norm_path]
 712     if drive_or_unc:
 713         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 714     elif force and s and s[0] == os.path.sep:
 715         sanitized_path.insert(0, os.path.sep)
 716     return os.path.join(*sanitized_path)
 717
 718
 719 def sanitize_url(url):
 720     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 721     # the number of unwanted failures due to missing protocol
 722     if url is None:
 723         return
 724     elif url.startswith('//'):
 725         return 'http:%s' % url
 726     # Fix some common typos seen so far
 727     COMMON_TYPOS = (
 728         # https://github.com/ytdl-org/youtube-dl/issues/15649
 729         (r'^httpss://', r'https://'),
 730         # https://bx1.be/lives/direct-tv/
 731         (r'^rmtp([es]?)://', r'rtmp\1://'),
 732     )
 733     for mistake, fixup in COMMON_TYPOS:
 734         if re.match(mistake, url):
 735             return re.sub(mistake, fixup, url)
 736     return url
 737
 738
 739 def extract_basic_auth(url):
 740     parts = urllib.parse.urlsplit(url)
 741     if parts.username is None:
 742         return url, None
 743     url = urllib.parse.urlunsplit(parts._replace(netloc=(
 744         parts.hostname if parts.port is None
 745         else '%s:%d' % (parts.hostname, parts.port))))
 746     auth_payload = base64.b64encode(
 747         ('%s:%s' % (parts.username, parts.password or '')).encode())
 748     return url, f'Basic {auth_payload.decode()}'
 749
 750
 751 def sanitized_Request(url, *args, **kwargs):
 752     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 753     if auth_header is not None:
 754         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 755         headers['Authorization'] = auth_header
 756     return urllib.request.Request(url, *args, **kwargs)
 757
 758
 759 def expand_path(s):
 760     """Expand shell variables and ~"""
 761     return os.path.expandvars(compat_expanduser(s))
 762
 763
 764 def orderedSet(iterable, *, lazy=False):
 765     """Remove all duplicates from the input iterable"""
 766     def _iter():
 767         seen = []  # Do not use set since the items can be unhashable
 768         for x in iterable:
 769             if x not in seen:
 770                 seen.append(x)
 771                 yield x
 772
 773     return _iter() if lazy else list(_iter())
 774
 775
 776 def _htmlentity_transform(entity_with_semicolon):
 777     """Transforms an HTML entity to a character."""
 778     entity = entity_with_semicolon[:-1]
 779
 780     # Known non-numeric HTML entity
 781     if entity in html.entities.name2codepoint:
 782         return chr(html.entities.name2codepoint[entity])
 783
 784     # TODO: HTML5 allows entities without a semicolon. For example,
 785     # '&Eacuteric' should be decoded as 'Éric'.
 786     if entity_with_semicolon in html.entities.html5:
 787         return html.entities.html5[entity_with_semicolon]
 788
 789     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 790     if mobj is not None:
 791         numstr = mobj.group(1)
 792         if numstr.startswith('x'):
 793             base = 16
 794             numstr = '0%s' % numstr
 795         else:
 796             base = 10
 797         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 798         with contextlib.suppress(ValueError):
 799             return chr(int(numstr, base))
 800
 801     # Unknown entity in name, return its literal representation
 802     return '&%s;' % entity
 803
 804
 805 def unescapeHTML(s):
 806     if s is None:
 807         return None
 808     assert isinstance(s, str)
 809
 810     return re.sub(
 811         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 812
 813
 814 def escapeHTML(text):
 815     return (
 816         text
 817         .replace('&', '&amp;')
 818         .replace('<', '&lt;')
 819         .replace('>', '&gt;')
 820         .replace('"', '&quot;')
 821         .replace("'", '&#39;')
 822     )
 823
 824
 825 def process_communicate_or_kill(p, *args, **kwargs):
 826     write_string('DeprecationWarning: yt_dlp.utils.process_communicate_or_kill is deprecated '
 827                  'and may be removed in a future version. Use yt_dlp.utils.Popen.communicate_or_kill instead')
 828     return Popen.communicate_or_kill(p, *args, **kwargs)
 829
 830
 831 class Popen(subprocess.Popen):
 832     if sys.platform == 'win32':
 833         _startupinfo = subprocess.STARTUPINFO()
 834         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 835     else:
 836         _startupinfo = None
 837
 838     def __init__(self, *args, text=False, **kwargs):
 839         if text is True:
 840             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 841             kwargs.setdefault('encoding', 'utf-8')
 842             kwargs.setdefault('errors', 'replace')
 843         super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
 844
 845     def communicate_or_kill(self, *args, **kwargs):
 846         try:
 847             return self.communicate(*args, **kwargs)
 848         except BaseException:  # Including KeyboardInterrupt
 849             self.kill(timeout=None)
 850             raise
 851
 852     def kill(self, *, timeout=0):
 853         super().kill()
 854         if timeout != 0:
 855             self.wait(timeout=timeout)
 856
 857     @classmethod
 858     def run(cls, *args, **kwargs):
 859         with cls(*args, **kwargs) as proc:
 860             stdout, stderr = proc.communicate_or_kill()
 861             return stdout or '', stderr or '', proc.returncode
 862
 863
 864 def get_subprocess_encoding():
 865     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 866         # For subprocess calls, encode with locale encoding
 867         # Refer to http://stackoverflow.com/a/9951851/35070
 868         encoding = preferredencoding()
 869     else:
 870         encoding = sys.getfilesystemencoding()
 871     if encoding is None:
 872         encoding = 'utf-8'
 873     return encoding
 874
 875
 876 def encodeFilename(s, for_subprocess=False):
 877     assert isinstance(s, str)
 878     return s
 879
 880
 881 def decodeFilename(b, for_subprocess=False):
 882     return b
 883
 884
 885 def encodeArgument(s):
 886     # Legacy code that uses byte strings
 887     # Uncomment the following line after fixing all post processors
 888     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
 889     return s if isinstance(s, str) else s.decode('ascii')
 890
 891
 892 def decodeArgument(b):
 893     return b
 894
 895
 896 def decodeOption(optval):
 897     if optval is None:
 898         return optval
 899     if isinstance(optval, bytes):
 900         optval = optval.decode(preferredencoding())
 901
 902     assert isinstance(optval, str)
 903     return optval
 904
 905
 906 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 907
 908
 909 def timetuple_from_msec(msec):
 910     secs, msec = divmod(msec, 1000)
 911     mins, secs = divmod(secs, 60)
 912     hrs, mins = divmod(mins, 60)
 913     return _timetuple(hrs, mins, secs, msec)
 914
 915
 916 def formatSeconds(secs, delim=':', msec=False):
 917     time = timetuple_from_msec(secs * 1000)
 918     if time.hours:
 919         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 920     elif time.minutes:
 921         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 922     else:
 923         ret = '%d' % time.seconds
 924     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 925
 926
 927 def _ssl_load_windows_store_certs(ssl_context, storename):
 928     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 929     try:
 930         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 931                  if encoding == 'x509_asn' and (
 932                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 933     except PermissionError:
 934         return
 935     for cert in certs:
 936         with contextlib.suppress(ssl.SSLError):
 937             ssl_context.load_verify_locations(cadata=cert)
 938
 939
 940 def make_HTTPS_handler(params, **kwargs):
 941     opts_check_certificate = not params.get('nocheckcertificate')
 942     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 943     context.check_hostname = opts_check_certificate
 944     if params.get('legacyserverconnect'):
 945         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 946         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 947         context.set_ciphers('DEFAULT')
 948
 949     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 950     if opts_check_certificate:
 951         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 952             context.load_verify_locations(cafile=certifi.where())
 953         else:
 954             try:
 955                 context.load_default_certs()
 956                 # Work around the issue in load_default_certs when there are bad certificates. See:
 957                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
 958                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 959             except ssl.SSLError:
 960                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 961                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 962                     for storename in ('CA', 'ROOT'):
 963                         _ssl_load_windows_store_certs(context, storename)
 964                 context.set_default_verify_paths()
 965
 966     client_certfile = params.get('client_certificate')
 967     if client_certfile:
 968         try:
 969             context.load_cert_chain(
 970                 client_certfile, keyfile=params.get('client_certificate_key'),
 971                 password=params.get('client_certificate_password'))
 972         except ssl.SSLError:
 973             raise YoutubeDLError('Unable to load client certificate')
 974
 975     # Some servers may reject requests if ALPN extension is not sent. See:
 976     # https://github.com/python/cpython/issues/85140
 977     # https://github.com/yt-dlp/yt-dlp/issues/3878
 978     with contextlib.suppress(NotImplementedError):
 979         context.set_alpn_protocols(['http/1.1'])
 980
 981     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 982
 983
 984 def bug_reports_message(before=';'):
 985     from .update import REPOSITORY
 986
 987     msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
 988            'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')
 989
 990     before = before.rstrip()
 991     if not before or before.endswith(('.', '!', '?')):
 992         msg = msg[0].title() + msg[1:]
 993
 994     return (before + ' ' if before else '') + msg
 995
 996
 997 class YoutubeDLError(Exception):
 998     """Base exception for YoutubeDL errors."""
 999     msg = None
1000
1001     def __init__(self, msg=None):
1002         if msg is not None:
1003             self.msg = msg
1004         elif self.msg is None:
1005             self.msg = type(self).__name__
1006         super().__init__(self.msg)
1007
1008
1009 network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
1010 if hasattr(ssl, 'CertificateError'):
1011     network_exceptions.append(ssl.CertificateError)
1012 network_exceptions = tuple(network_exceptions)
1013
1014
1015 class ExtractorError(YoutubeDLError):
1016     """Error during info extraction."""
1017
1018     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1019         """ tb, if given, is the original traceback (so that it can be printed out).
1020         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1021         """
1022         if sys.exc_info()[0] in network_exceptions:
1023             expected = True
1024
1025         self.orig_msg = str(msg)
1026         self.traceback = tb
1027         self.expected = expected
1028         self.cause = cause
1029         self.video_id = video_id
1030         self.ie = ie
1031         self.exc_info = sys.exc_info()  # preserve original exception
1032         if isinstance(self.exc_info[1], ExtractorError):
1033             self.exc_info = self.exc_info[1].exc_info
1034
1035         super().__init__(''.join((
1036             format_field(ie, None, '[%s] '),
1037             format_field(video_id, None, '%s: '),
1038             msg,
1039             format_field(cause, None, ' (caused by %r)'),
1040             '' if expected else bug_reports_message())))
1041
1042     def format_traceback(self):
1043         return join_nonempty(
1044             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1045             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1046             delim='\n') or None
1047
1048
1049 class UnsupportedError(ExtractorError):
1050     def __init__(self, url):
1051         super().__init__(
1052             'Unsupported URL: %s' % url, expected=True)
1053         self.url = url
1054
1055
1056 class RegexNotFoundError(ExtractorError):
1057     """Error when a regex didn't match"""
1058     pass
1059
1060
1061 class GeoRestrictedError(ExtractorError):
1062     """Geographic restriction Error exception.
1063
1064     This exception may be thrown when a video is not available from your
1065     geographic location due to geographic restrictions imposed by a website.
1066     """
1067
1068     def __init__(self, msg, countries=None, **kwargs):
1069         kwargs['expected'] = True
1070         super().__init__(msg, **kwargs)
1071         self.countries = countries
1072
1073
1074 class DownloadError(YoutubeDLError):
1075     """Download Error exception.
1076
1077     This exception may be thrown by FileDownloader objects if they are not
1078     configured to continue on errors. They will contain the appropriate
1079     error message.
1080     """
1081
1082     def __init__(self, msg, exc_info=None):
1083         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1084         super().__init__(msg)
1085         self.exc_info = exc_info
1086
1087
1088 class EntryNotInPlaylist(YoutubeDLError):
1089     """Entry not in playlist exception.
1090
1091     This exception will be thrown by YoutubeDL when a requested entry
1092     is not found in the playlist info_dict
1093     """
1094     msg = 'Entry not found in info'
1095
1096
1097 class SameFileError(YoutubeDLError):
1098     """Same File exception.
1099
1100     This exception will be thrown by FileDownloader objects if they detect
1101     multiple files would have to be downloaded to the same file on disk.
1102     """
1103     msg = 'Fixed output name but more than one file to download'
1104
1105     def __init__(self, filename=None):
1106         if filename is not None:
1107             self.msg += f': {filename}'
1108         super().__init__(self.msg)
1109
1110
1111 class PostProcessingError(YoutubeDLError):
1112     """Post Processing exception.
1113
1114     This exception may be raised by PostProcessor's .run() method to
1115     indicate an error in the postprocessing task.
1116     """
1117
1118
1119 class DownloadCancelled(YoutubeDLError):
1120     """ Exception raised when the download queue should be interrupted """
1121     msg = 'The download was cancelled'
1122
1123
1124 class ExistingVideoReached(DownloadCancelled):
1125     """ --break-on-existing triggered """
1126     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1127
1128
1129 class RejectedVideoReached(DownloadCancelled):
1130     """ --break-on-reject triggered """
1131     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1132
1133
1134 class MaxDownloadsReached(DownloadCancelled):
1135     """ --max-downloads limit has been reached. """
1136     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1137
1138
1139 class ReExtractInfo(YoutubeDLError):
1140     """ Video info needs to be re-extracted. """
1141
1142     def __init__(self, msg, expected=False):
1143         super().__init__(msg)
1144         self.expected = expected
1145
1146
1147 class ThrottledDownload(ReExtractInfo):
1148     """ Download speed below --throttled-rate. """
1149     msg = 'The download speed is below throttle limit'
1150
1151     def __init__(self):
1152         super().__init__(self.msg, expected=False)
1153
1154
1155 class UnavailableVideoError(YoutubeDLError):
1156     """Unavailable Format exception.
1157
1158     This exception will be thrown when a video is requested
1159     in a format that is not available for that video.
1160     """
1161     msg = 'Unable to download video'
1162
1163     def __init__(self, err=None):
1164         if err is not None:
1165             self.msg += f': {err}'
1166         super().__init__(self.msg)
1167
1168
1169 class ContentTooShortError(YoutubeDLError):
1170     """Content Too Short exception.
1171
1172     This exception may be raised by FileDownloader objects when a file they
1173     download is too small for what the server announced first, indicating
1174     the connection was probably interrupted.
1175     """
1176
1177     def __init__(self, downloaded, expected):
1178         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1179         # Both in bytes
1180         self.downloaded = downloaded
1181         self.expected = expected
1182
1183
1184 class XAttrMetadataError(YoutubeDLError):
1185     def __init__(self, code=None, msg='Unknown error'):
1186         super().__init__(msg)
1187         self.code = code
1188         self.msg = msg
1189
1190         # Parsing code and msg
1191         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1192                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1193             self.reason = 'NO_SPACE'
1194         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1195             self.reason = 'VALUE_TOO_LONG'
1196         else:
1197             self.reason = 'NOT_SUPPORTED'
1198
1199
1200 class XAttrUnavailableError(YoutubeDLError):
1201     pass
1202
1203
1204 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1205     hc = http_class(*args, **kwargs)
1206     source_address = ydl_handler._params.get('source_address')
1207
1208     if source_address is not None:
1209         # This is to workaround _create_connection() from socket where it will try all
1210         # address data from getaddrinfo() including IPv6. This filters the result from
1211         # getaddrinfo() based on the source_address value.
1212         # This is based on the cpython socket.create_connection() function.
1213         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1214         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1215             host, port = address
1216             err = None
1217             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1218             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1219             ip_addrs = [addr for addr in addrs if addr[0] == af]
1220             if addrs and not ip_addrs:
1221                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1222                 raise OSError(
1223                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1224                     % (ip_version, source_address[0]))
1225             for res in ip_addrs:
1226                 af, socktype, proto, canonname, sa = res
1227                 sock = None
1228                 try:
1229                     sock = socket.socket(af, socktype, proto)
1230                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1231                         sock.settimeout(timeout)
1232                     sock.bind(source_address)
1233                     sock.connect(sa)
1234                     err = None  # Explicitly break reference cycle
1235                     return sock
1236                 except OSError as _:
1237                     err = _
1238                     if sock is not None:
1239                         sock.close()
1240             if err is not None:
1241                 raise err
1242             else:
1243                 raise OSError('getaddrinfo returns an empty list')
1244         if hasattr(hc, '_create_connection'):
1245             hc._create_connection = _create_connection
1246         hc.source_address = (source_address, 0)
1247
1248     return hc
1249
1250
1251 def handle_youtubedl_headers(headers):
1252     filtered_headers = headers
1253
1254     if 'Youtubedl-no-compression' in filtered_headers:
1255         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1256         del filtered_headers['Youtubedl-no-compression']
1257
1258     return filtered_headers
1259
1260
1261 class YoutubeDLHandler(urllib.request.HTTPHandler):
1262     """Handler for HTTP requests and responses.
1263
1264     This class, when installed with an OpenerDirector, automatically adds
1265     the standard headers to every HTTP request and handles gzipped and
1266     deflated responses from web servers. If compression is to be avoided in
1267     a particular request, the original request in the program code only has
1268     to include the HTTP header "Youtubedl-no-compression", which will be
1269     removed before making the real request.
1270
1271     Part of this code was copied from:
1272
1273     http://techknack.net/python-urllib2-handlers/
1274
1275     Andrew Rowls, the author of that code, agreed to release it to the
1276     public domain.
1277     """
1278
1279     def __init__(self, params, *args, **kwargs):
1280         urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
1281         self._params = params
1282
1283     def http_open(self, req):
1284         conn_class = http.client.HTTPConnection
1285
1286         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1287         if socks_proxy:
1288             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1289             del req.headers['Ytdl-socks-proxy']
1290
1291         return self.do_open(functools.partial(
1292             _create_http_connection, self, conn_class, False),
1293             req)
1294
1295     @staticmethod
1296     def deflate(data):
1297         if not data:
1298             return data
1299         try:
1300             return zlib.decompress(data, -zlib.MAX_WBITS)
1301         except zlib.error:
1302             return zlib.decompress(data)
1303
1304     @staticmethod
1305     def brotli(data):
1306         if not data:
1307             return data
1308         return brotli.decompress(data)
1309
1310     def http_request(self, req):
1311         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1312         # always respected by websites, some tend to give out URLs with non percent-encoded
1313         # non-ASCII characters (see telemb.py, ard.py [#3412])
1314         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1315         # To work around aforementioned issue we will replace request's original URL with
1316         # percent-encoded one
1317         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1318         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1319         url = req.get_full_url()
1320         url_escaped = escape_url(url)
1321
1322         # Substitute URL if any change after escaping
1323         if url != url_escaped:
1324             req = update_Request(req, url=url_escaped)
1325
1326         for h, v in self._params.get('http_headers', std_headers).items():
1327             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1328             # The dict keys are capitalized because of this bug by urllib
1329             if h.capitalize() not in req.headers:
1330                 req.add_header(h, v)
1331
1332         if 'Accept-encoding' not in req.headers:
1333             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1334
1335         req.headers = handle_youtubedl_headers(req.headers)
1336
1337         return super().do_request_(req)
1338
1339     def http_response(self, req, resp):
1340         old_resp = resp
1341         # gzip
1342         if resp.headers.get('Content-encoding', '') == 'gzip':
1343             content = resp.read()
1344             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1345             try:
1346                 uncompressed = io.BytesIO(gz.read())
1347             except OSError as original_ioerror:
1348                 # There may be junk add the end of the file
1349                 # See http://stackoverflow.com/q/4928560/35070 for details
1350                 for i in range(1, 1024):
1351                     try:
1352                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1353                         uncompressed = io.BytesIO(gz.read())
1354                     except OSError:
1355                         continue
1356                     break
1357                 else:
1358                     raise original_ioerror
1359             resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1360             resp.msg = old_resp.msg
1361             del resp.headers['Content-encoding']
1362         # deflate
1363         if resp.headers.get('Content-encoding', '') == 'deflate':
1364             gz = io.BytesIO(self.deflate(resp.read()))
1365             resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1366             resp.msg = old_resp.msg
1367             del resp.headers['Content-encoding']
1368         # brotli
1369         if resp.headers.get('Content-encoding', '') == 'br':
1370             resp = urllib.request.addinfourl(
1371                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1372             resp.msg = old_resp.msg
1373             del resp.headers['Content-encoding']
1374         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1375         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1376         if 300 <= resp.code < 400:
1377             location = resp.headers.get('Location')
1378             if location:
1379                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1380                 location = location.encode('iso-8859-1').decode()
1381                 location_escaped = escape_url(location)
1382                 if location != location_escaped:
1383                     del resp.headers['Location']
1384                     resp.headers['Location'] = location_escaped
1385         return resp
1386
1387     https_request = http_request
1388     https_response = http_response
1389
1390
1391 def make_socks_conn_class(base_class, socks_proxy):
1392     assert issubclass(base_class, (
1393         http.client.HTTPConnection, http.client.HTTPSConnection))
1394
1395     url_components = urllib.parse.urlparse(socks_proxy)
1396     if url_components.scheme.lower() == 'socks5':
1397         socks_type = ProxyType.SOCKS5
1398     elif url_components.scheme.lower() in ('socks', 'socks4'):
1399         socks_type = ProxyType.SOCKS4
1400     elif url_components.scheme.lower() == 'socks4a':
1401         socks_type = ProxyType.SOCKS4A
1402
1403     def unquote_if_non_empty(s):
1404         if not s:
1405             return s
1406         return urllib.parse.unquote_plus(s)
1407
1408     proxy_args = (
1409         socks_type,
1410         url_components.hostname, url_components.port or 1080,
1411         True,  # Remote DNS
1412         unquote_if_non_empty(url_components.username),
1413         unquote_if_non_empty(url_components.password),
1414     )
1415
1416     class SocksConnection(base_class):
1417         def connect(self):
1418             self.sock = sockssocket()
1419             self.sock.setproxy(*proxy_args)
1420             if isinstance(self.timeout, (int, float)):
1421                 self.sock.settimeout(self.timeout)
1422             self.sock.connect((self.host, self.port))
1423
1424             if isinstance(self, http.client.HTTPSConnection):
1425                 if hasattr(self, '_context'):  # Python > 2.6
1426                     self.sock = self._context.wrap_socket(
1427                         self.sock, server_hostname=self.host)
1428                 else:
1429                     self.sock = ssl.wrap_socket(self.sock)
1430
1431     return SocksConnection
1432
1433
1434 class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
1435     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1436         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
1437         self._https_conn_class = https_conn_class or http.client.HTTPSConnection
1438         self._params = params
1439
1440     def https_open(self, req):
1441         kwargs = {}
1442         conn_class = self._https_conn_class
1443
1444         if hasattr(self, '_context'):  # python > 2.6
1445             kwargs['context'] = self._context
1446         if hasattr(self, '_check_hostname'):  # python 3.x
1447             kwargs['check_hostname'] = self._check_hostname
1448
1449         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1450         if socks_proxy:
1451             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1452             del req.headers['Ytdl-socks-proxy']
1453
1454         try:
1455             return self.do_open(
1456                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1457         except urllib.error.URLError as e:
1458             if (isinstance(e.reason, ssl.SSLError)
1459                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1460                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1461             raise
1462
1463
1464 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1465     """
1466     See [1] for cookie file format.
1467
1468     1. https://curl.haxx.se/docs/http-cookies.html
1469     """
1470     _HTTPONLY_PREFIX = '#HttpOnly_'
1471     _ENTRY_LEN = 7
1472     _HEADER = '''# Netscape HTTP Cookie File
1473 # This file is generated by yt-dlp.  Do not edit.
1474
1475 '''
1476     _CookieFileEntry = collections.namedtuple(
1477         'CookieFileEntry',
1478         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1479
1480     def __init__(self, filename=None, *args, **kwargs):
1481         super().__init__(None, *args, **kwargs)
1482         if self.is_path(filename):
1483             filename = os.fspath(filename)
1484         self.filename = filename
1485
1486     @staticmethod
1487     def _true_or_false(cndn):
1488         return 'TRUE' if cndn else 'FALSE'
1489
1490     @staticmethod
1491     def is_path(file):
1492         return isinstance(file, (str, bytes, os.PathLike))
1493
1494     @contextlib.contextmanager
1495     def open(self, file, *, write=False):
1496         if self.is_path(file):
1497             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1498                 yield f
1499         else:
1500             if write:
1501                 file.truncate(0)
1502             yield file
1503
1504     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1505         now = time.time()
1506         for cookie in self:
1507             if (not ignore_discard and cookie.discard
1508                     or not ignore_expires and cookie.is_expired(now)):
1509                 continue
1510             name, value = cookie.name, cookie.value
1511             if value is None:
1512                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1513                 # with no name, whereas http.cookiejar regards it as a
1514                 # cookie with no value.
1515                 name, value = '', name
1516             f.write('%s\n' % '\t'.join((
1517                 cookie.domain,
1518                 self._true_or_false(cookie.domain.startswith('.')),
1519                 cookie.path,
1520                 self._true_or_false(cookie.secure),
1521                 str_or_none(cookie.expires, default=''),
1522                 name, value
1523             )))
1524
1525     def save(self, filename=None, *args, **kwargs):
1526         """
1527         Save cookies to a file.
1528         Code is taken from CPython 3.6
1529         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1530
1531         if filename is None:
1532             if self.filename is not None:
1533                 filename = self.filename
1534             else:
1535                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1536
1537         # Store session cookies with `expires` set to 0 instead of an empty string
1538         for cookie in self:
1539             if cookie.expires is None:
1540                 cookie.expires = 0
1541
1542         with self.open(filename, write=True) as f:
1543             f.write(self._HEADER)
1544             self._really_save(f, *args, **kwargs)
1545
1546     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1547         """Load cookies from a file."""
1548         if filename is None:
1549             if self.filename is not None:
1550                 filename = self.filename
1551             else:
1552                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1553
1554         def prepare_line(line):
1555             if line.startswith(self._HTTPONLY_PREFIX):
1556                 line = line[len(self._HTTPONLY_PREFIX):]
1557             # comments and empty lines are fine
1558             if line.startswith('#') or not line.strip():
1559                 return line
1560             cookie_list = line.split('\t')
1561             if len(cookie_list) != self._ENTRY_LEN:
1562                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1563             cookie = self._CookieFileEntry(*cookie_list)
1564             if cookie.expires_at and not cookie.expires_at.isdigit():
1565                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1566             return line
1567
1568         cf = io.StringIO()
1569         with self.open(filename) as f:
1570             for line in f:
1571                 try:
1572                     cf.write(prepare_line(line))
1573                 except http.cookiejar.LoadError as e:
1574                     if f'{line.strip()} '[0] in '[{"':
1575                         raise http.cookiejar.LoadError(
1576                             'Cookies file must be Netscape formatted, not JSON. See  '
1577                             'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl')
1578                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1579                     continue
1580         cf.seek(0)
1581         self._really_load(cf, filename, ignore_discard, ignore_expires)
1582         # Session cookies are denoted by either `expires` field set to
1583         # an empty string or 0. MozillaCookieJar only recognizes the former
1584         # (see [1]). So we need force the latter to be recognized as session
1585         # cookies on our own.
1586         # Session cookies may be important for cookies-based authentication,
1587         # e.g. usually, when user does not check 'Remember me' check box while
1588         # logging in on a site, some important cookies are stored as session
1589         # cookies so that not recognizing them will result in failed login.
1590         # 1. https://bugs.python.org/issue17164
1591         for cookie in self:
1592             # Treat `expires=0` cookies as session cookies
1593             if cookie.expires == 0:
1594                 cookie.expires = None
1595                 cookie.discard = True
1596
1597
1598 class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
1599     def __init__(self, cookiejar=None):
1600         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
1601
1602     def http_response(self, request, response):
1603         return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
1604
1605     https_request = urllib.request.HTTPCookieProcessor.http_request
1606     https_response = http_response
1607
1608
1609 class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
1610     """YoutubeDL redirect handler
1611
1612     The code is based on HTTPRedirectHandler implementation from CPython [1].
1613
1614     This redirect handler solves two issues:
1615      - ensures redirect URL is always unicode under python 2
1616      - introduces support for experimental HTTP response status code
1617        308 Permanent Redirect [2] used by some sites [3]
1618
1619     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1620     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1621     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1622     """
1623
1624     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
1625
1626     def redirect_request(self, req, fp, code, msg, headers, newurl):
1627         """Return a Request or None in response to a redirect.
1628
1629         This is called by the http_error_30x methods when a
1630         redirection response is received.  If a redirection should
1631         take place, return a new Request to allow http_error_30x to
1632         perform the redirect.  Otherwise, raise HTTPError if no-one
1633         else should try to handle this url.  Return None if you can't
1634         but another Handler might.
1635         """
1636         m = req.get_method()
1637         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1638                  or code in (301, 302, 303) and m == "POST")):
1639             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
1640         # Strictly (according to RFC 2616), 301 or 302 in response to
1641         # a POST MUST NOT cause a redirection without confirmation
1642         # from the user (of urllib.request, in this case).  In practice,
1643         # essentially all clients do redirect in this case, so we do
1644         # the same.
1645
1646         # Be conciliant with URIs containing a space.  This is mainly
1647         # redundant with the more complete encoding done in http_error_302(),
1648         # but it is kept for compatibility with other callers.
1649         newurl = newurl.replace(' ', '%20')
1650
1651         CONTENT_HEADERS = ("content-length", "content-type")
1652         # NB: don't use dict comprehension for python 2.6 compatibility
1653         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1654
1655         # A 303 must either use GET or HEAD for subsequent request
1656         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1657         if code == 303 and m != 'HEAD':
1658             m = 'GET'
1659         # 301 and 302 redirects are commonly turned into a GET from a POST
1660         # for subsequent requests by browsers, so we'll do the same.
1661         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1662         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1663         if code in (301, 302) and m == 'POST':
1664             m = 'GET'
1665
1666         return urllib.request.Request(
1667             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1668             unverifiable=True, method=m)
1669
1670
1671 def extract_timezone(date_str):
1672     m = re.search(
1673         r'''(?x)
1674             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1675             (?P<tz>Z|                                            # just the UTC Z, or
1676                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1677                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1678                    [ ]?                                          # optional space
1679                 (?P<sign>\+|-)                                   # +/-
1680                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1681             $)
1682         ''', date_str)
1683     if not m:
1684         timezone = datetime.timedelta()
1685     else:
1686         date_str = date_str[:-len(m.group('tz'))]
1687         if not m.group('sign'):
1688             timezone = datetime.timedelta()
1689         else:
1690             sign = 1 if m.group('sign') == '+' else -1
1691             timezone = datetime.timedelta(
1692                 hours=sign * int(m.group('hours')),
1693                 minutes=sign * int(m.group('minutes')))
1694     return timezone, date_str
1695
1696
1697 def parse_iso8601(date_str, delimiter='T', timezone=None):
1698     """ Return a UNIX timestamp from the given date """
1699
1700     if date_str is None:
1701         return None
1702
1703     date_str = re.sub(r'\.[0-9]+', '', date_str)
1704
1705     if timezone is None:
1706         timezone, date_str = extract_timezone(date_str)
1707
1708     with contextlib.suppress(ValueError):
1709         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1710         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1711         return calendar.timegm(dt.timetuple())
1712
1713
1714 def date_formats(day_first=True):
1715     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1716
1717
1718 def unified_strdate(date_str, day_first=True):
1719     """Return a string with the date in the format YYYYMMDD"""
1720
1721     if date_str is None:
1722         return None
1723     upload_date = None
1724     # Replace commas
1725     date_str = date_str.replace(',', ' ')
1726     # Remove AM/PM + timezone
1727     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1728     _, date_str = extract_timezone(date_str)
1729
1730     for expression in date_formats(day_first):
1731         with contextlib.suppress(ValueError):
1732             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1733     if upload_date is None:
1734         timetuple = email.utils.parsedate_tz(date_str)
1735         if timetuple:
1736             with contextlib.suppress(ValueError):
1737                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1738     if upload_date is not None:
1739         return str(upload_date)
1740
1741
1742 def unified_timestamp(date_str, day_first=True):
1743     if date_str is None:
1744         return None
1745
1746     date_str = re.sub(r'[,|]', '', date_str)
1747
1748     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1749     timezone, date_str = extract_timezone(date_str)
1750
1751     # Remove AM/PM + timezone
1752     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1753
1754     # Remove unrecognized timezones from ISO 8601 alike timestamps
1755     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1756     if m:
1757         date_str = date_str[:-len(m.group('tz'))]
1758
1759     # Python only supports microseconds, so remove nanoseconds
1760     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1761     if m:
1762         date_str = m.group(1)
1763
1764     for expression in date_formats(day_first):
1765         with contextlib.suppress(ValueError):
1766             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1767             return calendar.timegm(dt.timetuple())
1768     timetuple = email.utils.parsedate_tz(date_str)
1769     if timetuple:
1770         return calendar.timegm(timetuple) + pm_delta * 3600
1771
1772
1773 def determine_ext(url, default_ext='unknown_video'):
1774     if url is None or '.' not in url:
1775         return default_ext
1776     guess = url.partition('?')[0].rpartition('.')[2]
1777     if re.match(r'^[A-Za-z0-9]+$', guess):
1778         return guess
1779     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1780     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1781         return guess.rstrip('/')
1782     else:
1783         return default_ext
1784
1785
1786 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1787     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1788
1789
1790 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1791     R"""
1792     Return a datetime object from a string.
1793     Supported format:
1794         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1795
1796     @param format       strftime format of DATE
1797     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1798                         auto: round to the unit provided in date_str (if applicable).
1799     """
1800     auto_precision = False
1801     if precision == 'auto':
1802         auto_precision = True
1803         precision = 'microsecond'
1804     today = datetime_round(datetime.datetime.utcnow(), precision)
1805     if date_str in ('now', 'today'):
1806         return today
1807     if date_str == 'yesterday':
1808         return today - datetime.timedelta(days=1)
1809     match = re.match(
1810         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1811         date_str)
1812     if match is not None:
1813         start_time = datetime_from_str(match.group('start'), precision, format)
1814         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1815         unit = match.group('unit')
1816         if unit == 'month' or unit == 'year':
1817             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1818             unit = 'day'
1819         else:
1820             if unit == 'week':
1821                 unit = 'day'
1822                 time *= 7
1823             delta = datetime.timedelta(**{unit + 's': time})
1824             new_date = start_time + delta
1825         if auto_precision:
1826             return datetime_round(new_date, unit)
1827         return new_date
1828
1829     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1830
1831
1832 def date_from_str(date_str, format='%Y%m%d', strict=False):
1833     R"""
1834     Return a date object from a string using datetime_from_str
1835
1836     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1837                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1838     """
1839     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1840         raise ValueError(f'Invalid date format "{date_str}"')
1841     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1842
1843
1844 def datetime_add_months(dt, months):
1845     """Increment/Decrement a datetime object by months."""
1846     month = dt.month + months - 1
1847     year = dt.year + month // 12
1848     month = month % 12 + 1
1849     day = min(dt.day, calendar.monthrange(year, month)[1])
1850     return dt.replace(year, month, day)
1851
1852
1853 def datetime_round(dt, precision='day'):
1854     """
1855     Round a datetime object's time to a specific precision
1856     """
1857     if precision == 'microsecond':
1858         return dt
1859
1860     unit_seconds = {
1861         'day': 86400,
1862         'hour': 3600,
1863         'minute': 60,
1864         'second': 1,
1865     }
1866     roundto = lambda x, n: ((x + n / 2) // n) * n
1867     timestamp = calendar.timegm(dt.timetuple())
1868     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1869
1870
1871 def hyphenate_date(date_str):
1872     """
1873     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1874     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1875     if match is not None:
1876         return '-'.join(match.groups())
1877     else:
1878         return date_str
1879
1880
1881 class DateRange:
1882     """Represents a time interval between two dates"""
1883
1884     def __init__(self, start=None, end=None):
1885         """start and end must be strings in the format accepted by date"""
1886         if start is not None:
1887             self.start = date_from_str(start, strict=True)
1888         else:
1889             self.start = datetime.datetime.min.date()
1890         if end is not None:
1891             self.end = date_from_str(end, strict=True)
1892         else:
1893             self.end = datetime.datetime.max.date()
1894         if self.start > self.end:
1895             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1896
1897     @classmethod
1898     def day(cls, day):
1899         """Returns a range that only contains the given day"""
1900         return cls(day, day)
1901
1902     def __contains__(self, date):
1903         """Check if the date is in the range"""
1904         if not isinstance(date, datetime.date):
1905             date = date_from_str(date)
1906         return self.start <= date <= self.end
1907
1908     def __str__(self):
1909         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1910
1911
1912 def platform_name():
1913     """ Returns the platform name as a str """
1914     write_string('DeprecationWarning: yt_dlp.utils.platform_name is deprecated, use platform.platform instead')
1915     return platform.platform()
1916
1917
1918 @functools.cache
1919 def system_identifier():
1920     python_implementation = platform.python_implementation()
1921     if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
1922         python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
1923
1924     return 'Python %s (%s %s) - %s %s' % (
1925         platform.python_version(),
1926         python_implementation,
1927         platform.architecture()[0],
1928         platform.platform(),
1929         format_field(join_nonempty(*platform.libc_ver(), delim=' '), None, '(%s)'),
1930     )
1931
1932
1933 @functools.cache
1934 def get_windows_version():
1935     ''' Get Windows version. returns () if it's not running on Windows '''
1936     if compat_os_name == 'nt':
1937         return version_tuple(platform.win32_ver()[1])
1938     else:
1939         return ()
1940
1941
1942 def write_string(s, out=None, encoding=None):
1943     assert isinstance(s, str)
1944     out = out or sys.stderr
1945
1946     if compat_os_name == 'nt' and supports_terminal_sequences(out):
1947         s = re.sub(r'([\r\n]+)', r' \1', s)
1948
1949     enc, buffer = None, out
1950     if 'b' in getattr(out, 'mode', ''):
1951         enc = encoding or preferredencoding()
1952     elif hasattr(out, 'buffer'):
1953         buffer = out.buffer
1954         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1955
1956     buffer.write(s.encode(enc, 'ignore') if enc else s)
1957     out.flush()
1958
1959
1960 def bytes_to_intlist(bs):
1961     if not bs:
1962         return []
1963     if isinstance(bs[0], int):  # Python 3
1964         return list(bs)
1965     else:
1966         return [ord(c) for c in bs]
1967
1968
1969 def intlist_to_bytes(xs):
1970     if not xs:
1971         return b''
1972     return struct.pack('%dB' % len(xs), *xs)
1973
1974
1975 class LockingUnsupportedError(OSError):
1976     msg = 'File locking is not supported'
1977
1978     def __init__(self):
1979         super().__init__(self.msg)
1980
1981
1982 # Cross-platform file locking
1983 if sys.platform == 'win32':
1984     import ctypes.wintypes
1985     import msvcrt
1986
1987     class OVERLAPPED(ctypes.Structure):
1988         _fields_ = [
1989             ('Internal', ctypes.wintypes.LPVOID),
1990             ('InternalHigh', ctypes.wintypes.LPVOID),
1991             ('Offset', ctypes.wintypes.DWORD),
1992             ('OffsetHigh', ctypes.wintypes.DWORD),
1993             ('hEvent', ctypes.wintypes.HANDLE),
1994         ]
1995
1996     kernel32 = ctypes.windll.kernel32
1997     LockFileEx = kernel32.LockFileEx
1998     LockFileEx.argtypes = [
1999         ctypes.wintypes.HANDLE,     # hFile
2000         ctypes.wintypes.DWORD,      # dwFlags
2001         ctypes.wintypes.DWORD,      # dwReserved
2002         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2003         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2004         ctypes.POINTER(OVERLAPPED)  # Overlapped
2005     ]
2006     LockFileEx.restype = ctypes.wintypes.BOOL
2007     UnlockFileEx = kernel32.UnlockFileEx
2008     UnlockFileEx.argtypes = [
2009         ctypes.wintypes.HANDLE,     # hFile
2010         ctypes.wintypes.DWORD,      # dwReserved
2011         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2012         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2013         ctypes.POINTER(OVERLAPPED)  # Overlapped
2014     ]
2015     UnlockFileEx.restype = ctypes.wintypes.BOOL
2016     whole_low = 0xffffffff
2017     whole_high = 0x7fffffff
2018
2019     def _lock_file(f, exclusive, block):
2020         overlapped = OVERLAPPED()
2021         overlapped.Offset = 0
2022         overlapped.OffsetHigh = 0
2023         overlapped.hEvent = 0
2024         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2025
2026         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2027                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2028                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2029             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2030             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2031
2032     def _unlock_file(f):
2033         assert f._lock_file_overlapped_p
2034         handle = msvcrt.get_osfhandle(f.fileno())
2035         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2036             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2037
2038 else:
2039     try:
2040         import fcntl
2041
2042         def _lock_file(f, exclusive, block):
2043             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2044             if not block:
2045                 flags |= fcntl.LOCK_NB
2046             try:
2047                 fcntl.flock(f, flags)
2048             except BlockingIOError:
2049                 raise
2050             except OSError:  # AOSP does not have flock()
2051                 fcntl.lockf(f, flags)
2052
2053         def _unlock_file(f):
2054             try:
2055                 fcntl.flock(f, fcntl.LOCK_UN)
2056             except OSError:
2057                 fcntl.lockf(f, fcntl.LOCK_UN)
2058
2059     except ImportError:
2060
2061         def _lock_file(f, exclusive, block):
2062             raise LockingUnsupportedError()
2063
2064         def _unlock_file(f):
2065             raise LockingUnsupportedError()
2066
2067
2068 class locked_file:
2069     locked = False
2070
2071     def __init__(self, filename, mode, block=True, encoding=None):
2072         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2073             raise NotImplementedError(mode)
2074         self.mode, self.block = mode, block
2075
2076         writable = any(f in mode for f in 'wax+')
2077         readable = any(f in mode for f in 'r+')
2078         flags = functools.reduce(operator.ior, (
2079             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2080             getattr(os, 'O_BINARY', 0),  # Windows only
2081             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2082             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2083             os.O_APPEND if 'a' in mode else 0,
2084             os.O_EXCL if 'x' in mode else 0,
2085             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2086         ))
2087
2088         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2089
2090     def __enter__(self):
2091         exclusive = 'r' not in self.mode
2092         try:
2093             _lock_file(self.f, exclusive, self.block)
2094             self.locked = True
2095         except OSError:
2096             self.f.close()
2097             raise
2098         if 'w' in self.mode:
2099             try:
2100                 self.f.truncate()
2101             except OSError as e:
2102                 if e.errno not in (
2103                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2104                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2105                 ):
2106                     raise
2107         return self
2108
2109     def unlock(self):
2110         if not self.locked:
2111             return
2112         try:
2113             _unlock_file(self.f)
2114         finally:
2115             self.locked = False
2116
2117     def __exit__(self, *_):
2118         try:
2119             self.unlock()
2120         finally:
2121             self.f.close()
2122
2123     open = __enter__
2124     close = __exit__
2125
2126     def __getattr__(self, attr):
2127         return getattr(self.f, attr)
2128
2129     def __iter__(self):
2130         return iter(self.f)
2131
2132
2133 @functools.cache
2134 def get_filesystem_encoding():
2135     encoding = sys.getfilesystemencoding()
2136     return encoding if encoding is not None else 'utf-8'
2137
2138
2139 def shell_quote(args):
2140     quoted_args = []
2141     encoding = get_filesystem_encoding()
2142     for a in args:
2143         if isinstance(a, bytes):
2144             # We may get a filename encoded with 'encodeFilename'
2145             a = a.decode(encoding)
2146         quoted_args.append(compat_shlex_quote(a))
2147     return ' '.join(quoted_args)
2148
2149
2150 def smuggle_url(url, data):
2151     """ Pass additional data in a URL for internal use. """
2152
2153     url, idata = unsmuggle_url(url, {})
2154     data.update(idata)
2155     sdata = urllib.parse.urlencode(
2156         {'__youtubedl_smuggle': json.dumps(data)})
2157     return url + '#' + sdata
2158
2159
2160 def unsmuggle_url(smug_url, default=None):
2161     if '#__youtubedl_smuggle' not in smug_url:
2162         return smug_url, default
2163     url, _, sdata = smug_url.rpartition('#')
2164     jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
2165     data = json.loads(jsond)
2166     return url, data
2167
2168
2169 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2170     """ Formats numbers with decimal sufixes like K, M, etc """
2171     num, factor = float_or_none(num), float(factor)
2172     if num is None or num < 0:
2173         return None
2174     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2175     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2176     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2177     if factor == 1024:
2178         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2179     converted = num / (factor ** exponent)
2180     return fmt % (converted, suffix)
2181
2182
2183 def format_bytes(bytes):
2184     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2185
2186
2187 def lookup_unit_table(unit_table, s):
2188     units_re = '|'.join(re.escape(u) for u in unit_table)
2189     m = re.match(
2190         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2191     if not m:
2192         return None
2193     num_str = m.group('num').replace(',', '.')
2194     mult = unit_table[m.group('unit')]
2195     return int(float(num_str) * mult)
2196
2197
2198 def parse_filesize(s):
2199     if s is None:
2200         return None
2201
2202     # The lower-case forms are of course incorrect and unofficial,
2203     # but we support those too
2204     _UNIT_TABLE = {
2205         'B': 1,
2206         'b': 1,
2207         'bytes': 1,
2208         'KiB': 1024,
2209         'KB': 1000,
2210         'kB': 1024,
2211         'Kb': 1000,
2212         'kb': 1000,
2213         'kilobytes': 1000,
2214         'kibibytes': 1024,
2215         'MiB': 1024 ** 2,
2216         'MB': 1000 ** 2,
2217         'mB': 1024 ** 2,
2218         'Mb': 1000 ** 2,
2219         'mb': 1000 ** 2,
2220         'megabytes': 1000 ** 2,
2221         'mebibytes': 1024 ** 2,
2222         'GiB': 1024 ** 3,
2223         'GB': 1000 ** 3,
2224         'gB': 1024 ** 3,
2225         'Gb': 1000 ** 3,
2226         'gb': 1000 ** 3,
2227         'gigabytes': 1000 ** 3,
2228         'gibibytes': 1024 ** 3,
2229         'TiB': 1024 ** 4,
2230         'TB': 1000 ** 4,
2231         'tB': 1024 ** 4,
2232         'Tb': 1000 ** 4,
2233         'tb': 1000 ** 4,
2234         'terabytes': 1000 ** 4,
2235         'tebibytes': 1024 ** 4,
2236         'PiB': 1024 ** 5,
2237         'PB': 1000 ** 5,
2238         'pB': 1024 ** 5,
2239         'Pb': 1000 ** 5,
2240         'pb': 1000 ** 5,
2241         'petabytes': 1000 ** 5,
2242         'pebibytes': 1024 ** 5,
2243         'EiB': 1024 ** 6,
2244         'EB': 1000 ** 6,
2245         'eB': 1024 ** 6,
2246         'Eb': 1000 ** 6,
2247         'eb': 1000 ** 6,
2248         'exabytes': 1000 ** 6,
2249         'exbibytes': 1024 ** 6,
2250         'ZiB': 1024 ** 7,
2251         'ZB': 1000 ** 7,
2252         'zB': 1024 ** 7,
2253         'Zb': 1000 ** 7,
2254         'zb': 1000 ** 7,
2255         'zettabytes': 1000 ** 7,
2256         'zebibytes': 1024 ** 7,
2257         'YiB': 1024 ** 8,
2258         'YB': 1000 ** 8,
2259         'yB': 1024 ** 8,
2260         'Yb': 1000 ** 8,
2261         'yb': 1000 ** 8,
2262         'yottabytes': 1000 ** 8,
2263         'yobibytes': 1024 ** 8,
2264     }
2265
2266     return lookup_unit_table(_UNIT_TABLE, s)
2267
2268
2269 def parse_count(s):
2270     if s is None:
2271         return None
2272
2273     s = re.sub(r'^[^\d]+\s', '', s).strip()
2274
2275     if re.match(r'^[\d,.]+$', s):
2276         return str_to_int(s)
2277
2278     _UNIT_TABLE = {
2279         'k': 1000,
2280         'K': 1000,
2281         'm': 1000 ** 2,
2282         'M': 1000 ** 2,
2283         'kk': 1000 ** 2,
2284         'KK': 1000 ** 2,
2285         'b': 1000 ** 3,
2286         'B': 1000 ** 3,
2287     }
2288
2289     ret = lookup_unit_table(_UNIT_TABLE, s)
2290     if ret is not None:
2291         return ret
2292
2293     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2294     if mobj:
2295         return str_to_int(mobj.group(1))
2296
2297
2298 def parse_resolution(s, *, lenient=False):
2299     if s is None:
2300         return {}
2301
2302     if lenient:
2303         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2304     else:
2305         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2306     if mobj:
2307         return {
2308             'width': int(mobj.group('w')),
2309             'height': int(mobj.group('h')),
2310         }
2311
2312     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2313     if mobj:
2314         return {'height': int(mobj.group(1))}
2315
2316     mobj = re.search(r'\b([48])[kK]\b', s)
2317     if mobj:
2318         return {'height': int(mobj.group(1)) * 540}
2319
2320     return {}
2321
2322
2323 def parse_bitrate(s):
2324     if not isinstance(s, str):
2325         return
2326     mobj = re.search(r'\b(\d+)\s*kbps', s)
2327     if mobj:
2328         return int(mobj.group(1))
2329
2330
2331 def month_by_name(name, lang='en'):
2332     """ Return the number of a month by (locale-independently) English name """
2333
2334     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2335
2336     try:
2337         return month_names.index(name) + 1
2338     except ValueError:
2339         return None
2340
2341
2342 def month_by_abbreviation(abbrev):
2343     """ Return the number of a month by (locale-independently) English
2344         abbreviations """
2345
2346     try:
2347         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2348     except ValueError:
2349         return None
2350
2351
2352 def fix_xml_ampersands(xml_str):
2353     """Replace all the '&' by '&amp;' in XML"""
2354     return re.sub(
2355         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2356         '&amp;',
2357         xml_str)
2358
2359
2360 def setproctitle(title):
2361     assert isinstance(title, str)
2362
2363     # ctypes in Jython is not complete
2364     # http://bugs.jython.org/issue2148
2365     if sys.platform.startswith('java'):
2366         return
2367
2368     try:
2369         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2370     except OSError:
2371         return
2372     except TypeError:
2373         # LoadLibrary in Windows Python 2.7.13 only expects
2374         # a bytestring, but since unicode_literals turns
2375         # every string into a unicode string, it fails.
2376         return
2377     title_bytes = title.encode()
2378     buf = ctypes.create_string_buffer(len(title_bytes))
2379     buf.value = title_bytes
2380     try:
2381         libc.prctl(15, buf, 0, 0, 0)
2382     except AttributeError:
2383         return  # Strange libc, just skip this
2384
2385
2386 def remove_start(s, start):
2387     return s[len(start):] if s is not None and s.startswith(start) else s
2388
2389
2390 def remove_end(s, end):
2391     return s[:-len(end)] if s is not None and s.endswith(end) else s
2392
2393
2394 def remove_quotes(s):
2395     if s is None or len(s) < 2:
2396         return s
2397     for quote in ('"', "'", ):
2398         if s[0] == quote and s[-1] == quote:
2399             return s[1:-1]
2400     return s
2401
2402
2403 def get_domain(url):
2404     return '.'.join(urllib.parse.urlparse(url).netloc.rsplit('.', 2)[-2:])
2405
2406
2407 def url_basename(url):
2408     path = urllib.parse.urlparse(url).path
2409     return path.strip('/').split('/')[-1]
2410
2411
2412 def base_url(url):
2413     return re.match(r'https?://[^?#&]+/', url).group()
2414
2415
2416 def urljoin(base, path):
2417     if isinstance(path, bytes):
2418         path = path.decode()
2419     if not isinstance(path, str) or not path:
2420         return None
2421     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2422         return path
2423     if isinstance(base, bytes):
2424         base = base.decode()
2425     if not isinstance(base, str) or not re.match(
2426             r'^(?:https?:)?//', base):
2427         return None
2428     return urllib.parse.urljoin(base, path)
2429
2430
2431 class HEADRequest(urllib.request.Request):
2432     def get_method(self):
2433         return 'HEAD'
2434
2435
2436 class PUTRequest(urllib.request.Request):
2437     def get_method(self):
2438         return 'PUT'
2439
2440
2441 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2442     if get_attr and v is not None:
2443         v = getattr(v, get_attr, None)
2444     try:
2445         return int(v) * invscale // scale
2446     except (ValueError, TypeError, OverflowError):
2447         return default
2448
2449
2450 def str_or_none(v, default=None):
2451     return default if v is None else str(v)
2452
2453
2454 def str_to_int(int_str):
2455     """ A more relaxed version of int_or_none """
2456     if isinstance(int_str, int):
2457         return int_str
2458     elif isinstance(int_str, str):
2459         int_str = re.sub(r'[,\.\+]', '', int_str)
2460         return int_or_none(int_str)
2461
2462
2463 def float_or_none(v, scale=1, invscale=1, default=None):
2464     if v is None:
2465         return default
2466     try:
2467         return float(v) * invscale / scale
2468     except (ValueError, TypeError):
2469         return default
2470
2471
2472 def bool_or_none(v, default=None):
2473     return v if isinstance(v, bool) else default
2474
2475
2476 def strip_or_none(v, default=None):
2477     return v.strip() if isinstance(v, str) else default
2478
2479
2480 def url_or_none(url):
2481     if not url or not isinstance(url, str):
2482         return None
2483     url = url.strip()
2484     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2485
2486
2487 def request_to_url(req):
2488     if isinstance(req, urllib.request.Request):
2489         return req.get_full_url()
2490     else:
2491         return req
2492
2493
2494 def strftime_or_none(timestamp, date_format, default=None):
2495     datetime_object = None
2496     try:
2497         if isinstance(timestamp, (int, float)):  # unix timestamp
2498             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2499         elif isinstance(timestamp, str):  # assume YYYYMMDD
2500             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2501         return datetime_object.strftime(date_format)
2502     except (ValueError, TypeError, AttributeError):
2503         return default
2504
2505
2506 def parse_duration(s):
2507     if not isinstance(s, str):
2508         return None
2509     s = s.strip()
2510     if not s:
2511         return None
2512
2513     days, hours, mins, secs, ms = [None] * 5
2514     m = re.match(r'''(?x)
2515             (?P<before_secs>
2516                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2517             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2518             (?P<ms>[.:][0-9]+)?Z?$
2519         ''', s)
2520     if m:
2521         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2522     else:
2523         m = re.match(
2524             r'''(?ix)(?:P?
2525                 (?:
2526                     [0-9]+\s*y(?:ears?)?,?\s*
2527                 )?
2528                 (?:
2529                     [0-9]+\s*m(?:onths?)?,?\s*
2530                 )?
2531                 (?:
2532                     [0-9]+\s*w(?:eeks?)?,?\s*
2533                 )?
2534                 (?:
2535                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2536                 )?
2537                 T)?
2538                 (?:
2539                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2540                 )?
2541                 (?:
2542                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2543                 )?
2544                 (?:
2545                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2546                 )?Z?$''', s)
2547         if m:
2548             days, hours, mins, secs, ms = m.groups()
2549         else:
2550             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2551             if m:
2552                 hours, mins = m.groups()
2553             else:
2554                 return None
2555
2556     if ms:
2557         ms = ms.replace(':', '.')
2558     return sum(float(part or 0) * mult for part, mult in (
2559         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2560
2561
2562 def prepend_extension(filename, ext, expected_real_ext=None):
2563     name, real_ext = os.path.splitext(filename)
2564     return (
2565         f'{name}.{ext}{real_ext}'
2566         if not expected_real_ext or real_ext[1:] == expected_real_ext
2567         else f'{filename}.{ext}')
2568
2569
2570 def replace_extension(filename, ext, expected_real_ext=None):
2571     name, real_ext = os.path.splitext(filename)
2572     return '{}.{}'.format(
2573         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2574         ext)
2575
2576
2577 def check_executable(exe, args=[]):
2578     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2579     args can be a list of arguments for a short output (like -version) """
2580     try:
2581         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2582     except OSError:
2583         return False
2584     return exe
2585
2586
2587 def _get_exe_version_output(exe, args, *, to_screen=None):
2588     if to_screen:
2589         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2590     try:
2591         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2592         # SIGTTOU if yt-dlp is run in the background.
2593         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2594         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2595                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2596     except OSError:
2597         return False
2598     return stdout
2599
2600
2601 def detect_exe_version(output, version_re=None, unrecognized='present'):
2602     assert isinstance(output, str)
2603     if version_re is None:
2604         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2605     m = re.search(version_re, output)
2606     if m:
2607         return m.group(1)
2608     else:
2609         return unrecognized
2610
2611
2612 def get_exe_version(exe, args=['--version'],
2613                     version_re=None, unrecognized='present'):
2614     """ Returns the version of the specified executable,
2615     or False if the executable is not present """
2616     out = _get_exe_version_output(exe, args)
2617     return detect_exe_version(out, version_re, unrecognized) if out else False
2618
2619
2620 def frange(start=0, stop=None, step=1):
2621     """Float range"""
2622     if stop is None:
2623         start, stop = 0, start
2624     sign = [-1, 1][step > 0] if step else 0
2625     while sign * start < sign * stop:
2626         yield start
2627         start += step
2628
2629
2630 class LazyList(collections.abc.Sequence):
2631     """Lazy immutable list from an iterable
2632     Note that slices of a LazyList are lists and not LazyList"""
2633
2634     class IndexError(IndexError):
2635         pass
2636
2637     def __init__(self, iterable, *, reverse=False, _cache=None):
2638         self._iterable = iter(iterable)
2639         self._cache = [] if _cache is None else _cache
2640         self._reversed = reverse
2641
2642     def __iter__(self):
2643         if self._reversed:
2644             # We need to consume the entire iterable to iterate in reverse
2645             yield from self.exhaust()
2646             return
2647         yield from self._cache
2648         for item in self._iterable:
2649             self._cache.append(item)
2650             yield item
2651
2652     def _exhaust(self):
2653         self._cache.extend(self._iterable)
2654         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2655         return self._cache
2656
2657     def exhaust(self):
2658         """Evaluate the entire iterable"""
2659         return self._exhaust()[::-1 if self._reversed else 1]
2660
2661     @staticmethod
2662     def _reverse_index(x):
2663         return None if x is None else -(x + 1)
2664
2665     def __getitem__(self, idx):
2666         if isinstance(idx, slice):
2667             if self._reversed:
2668                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2669             start, stop, step = idx.start, idx.stop, idx.step or 1
2670         elif isinstance(idx, int):
2671             if self._reversed:
2672                 idx = self._reverse_index(idx)
2673             start, stop, step = idx, idx, 0
2674         else:
2675             raise TypeError('indices must be integers or slices')
2676         if ((start or 0) < 0 or (stop or 0) < 0
2677                 or (start is None and step < 0)
2678                 or (stop is None and step > 0)):
2679             # We need to consume the entire iterable to be able to slice from the end
2680             # Obviously, never use this with infinite iterables
2681             self._exhaust()
2682             try:
2683                 return self._cache[idx]
2684             except IndexError as e:
2685                 raise self.IndexError(e) from e
2686         n = max(start or 0, stop or 0) - len(self._cache) + 1
2687         if n > 0:
2688             self._cache.extend(itertools.islice(self._iterable, n))
2689         try:
2690             return self._cache[idx]
2691         except IndexError as e:
2692             raise self.IndexError(e) from e
2693
2694     def __bool__(self):
2695         try:
2696             self[-1] if self._reversed else self[0]
2697         except self.IndexError:
2698             return False
2699         return True
2700
2701     def __len__(self):
2702         self._exhaust()
2703         return len(self._cache)
2704
2705     def __reversed__(self):
2706         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2707
2708     def __copy__(self):
2709         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2710
2711     def __repr__(self):
2712         # repr and str should mimic a list. So we exhaust the iterable
2713         return repr(self.exhaust())
2714
2715     def __str__(self):
2716         return repr(self.exhaust())
2717
2718
2719 class PagedList:
2720
2721     class IndexError(IndexError):
2722         pass
2723
2724     def __len__(self):
2725         # This is only useful for tests
2726         return len(self.getslice())
2727
2728     def __init__(self, pagefunc, pagesize, use_cache=True):
2729         self._pagefunc = pagefunc
2730         self._pagesize = pagesize
2731         self._pagecount = float('inf')
2732         self._use_cache = use_cache
2733         self._cache = {}
2734
2735     def getpage(self, pagenum):
2736         page_results = self._cache.get(pagenum)
2737         if page_results is None:
2738             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2739         if self._use_cache:
2740             self._cache[pagenum] = page_results
2741         return page_results
2742
2743     def getslice(self, start=0, end=None):
2744         return list(self._getslice(start, end))
2745
2746     def _getslice(self, start, end):
2747         raise NotImplementedError('This method must be implemented by subclasses')
2748
2749     def __getitem__(self, idx):
2750         assert self._use_cache, 'Indexing PagedList requires cache'
2751         if not isinstance(idx, int) or idx < 0:
2752             raise TypeError('indices must be non-negative integers')
2753         entries = self.getslice(idx, idx + 1)
2754         if not entries:
2755             raise self.IndexError()
2756         return entries[0]
2757
2758
2759 class OnDemandPagedList(PagedList):
2760     """Download pages until a page with less than maximum results"""
2761
2762     def _getslice(self, start, end):
2763         for pagenum in itertools.count(start // self._pagesize):
2764             firstid = pagenum * self._pagesize
2765             nextfirstid = pagenum * self._pagesize + self._pagesize
2766             if start >= nextfirstid:
2767                 continue
2768
2769             startv = (
2770                 start % self._pagesize
2771                 if firstid <= start < nextfirstid
2772                 else 0)
2773             endv = (
2774                 ((end - 1) % self._pagesize) + 1
2775                 if (end is not None and firstid <= end <= nextfirstid)
2776                 else None)
2777
2778             try:
2779                 page_results = self.getpage(pagenum)
2780             except Exception:
2781                 self._pagecount = pagenum - 1
2782                 raise
2783             if startv != 0 or endv is not None:
2784                 page_results = page_results[startv:endv]
2785             yield from page_results
2786
2787             # A little optimization - if current page is not "full", ie. does
2788             # not contain page_size videos then we can assume that this page
2789             # is the last one - there are no more ids on further pages -
2790             # i.e. no need to query again.
2791             if len(page_results) + startv < self._pagesize:
2792                 break
2793
2794             # If we got the whole page, but the next page is not interesting,
2795             # break out early as well
2796             if end == nextfirstid:
2797                 break
2798
2799
2800 class InAdvancePagedList(PagedList):
2801     """PagedList with total number of pages known in advance"""
2802
2803     def __init__(self, pagefunc, pagecount, pagesize):
2804         PagedList.__init__(self, pagefunc, pagesize, True)
2805         self._pagecount = pagecount
2806
2807     def _getslice(self, start, end):
2808         start_page = start // self._pagesize
2809         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2810         skip_elems = start - start_page * self._pagesize
2811         only_more = None if end is None else end - start
2812         for pagenum in range(start_page, end_page):
2813             page_results = self.getpage(pagenum)
2814             if skip_elems:
2815                 page_results = page_results[skip_elems:]
2816                 skip_elems = None
2817             if only_more is not None:
2818                 if len(page_results) < only_more:
2819                     only_more -= len(page_results)
2820                 else:
2821                     yield from page_results[:only_more]
2822                     break
2823             yield from page_results
2824
2825
2826 class PlaylistEntries:
2827     MissingEntry = object()
2828     is_exhausted = False
2829
2830     def __init__(self, ydl, info_dict):
2831         self.ydl = ydl
2832
2833         # _entries must be assigned now since infodict can change during iteration
2834         entries = info_dict.get('entries')
2835         if entries is None:
2836             raise EntryNotInPlaylist('There are no entries')
2837         elif isinstance(entries, list):
2838             self.is_exhausted = True
2839
2840         requested_entries = info_dict.get('requested_entries')
2841         self.is_incomplete = bool(requested_entries)
2842         if self.is_incomplete:
2843             assert self.is_exhausted
2844             self._entries = [self.MissingEntry] * max(requested_entries)
2845             for i, entry in zip(requested_entries, entries):
2846                 self._entries[i - 1] = entry
2847         elif isinstance(entries, (list, PagedList, LazyList)):
2848             self._entries = entries
2849         else:
2850             self._entries = LazyList(entries)
2851
2852     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2853         (?P<start>[+-]?\d+)?
2854         (?P<range>[:-]
2855             (?P<end>[+-]?\d+|inf(?:inite)?)?
2856             (?::(?P<step>[+-]?\d+))?
2857         )?''')
2858
2859     @classmethod
2860     def parse_playlist_items(cls, string):
2861         for segment in string.split(','):
2862             if not segment:
2863                 raise ValueError('There is two or more consecutive commas')
2864             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2865             if not mobj:
2866                 raise ValueError(f'{segment!r} is not a valid specification')
2867             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2868             if int_or_none(step) == 0:
2869                 raise ValueError(f'Step in {segment!r} cannot be zero')
2870             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2871
2872     def get_requested_items(self):
2873         playlist_items = self.ydl.params.get('playlist_items')
2874         playlist_start = self.ydl.params.get('playliststart', 1)
2875         playlist_end = self.ydl.params.get('playlistend')
2876         # For backwards compatibility, interpret -1 as whole list
2877         if playlist_end in (-1, None):
2878             playlist_end = ''
2879         if not playlist_items:
2880             playlist_items = f'{playlist_start}:{playlist_end}'
2881         elif playlist_start != 1 or playlist_end:
2882             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
2883
2884         for index in self.parse_playlist_items(playlist_items):
2885             for i, entry in self[index]:
2886                 yield i, entry
2887                 if not entry:
2888                     continue
2889                 try:
2890                     # TODO: Add auto-generated fields
2891                     self.ydl._match_entry(entry, incomplete=True, silent=True)
2892                 except (ExistingVideoReached, RejectedVideoReached):
2893                     return
2894
2895     def get_full_count(self):
2896         if self.is_exhausted and not self.is_incomplete:
2897             return len(self)
2898         elif isinstance(self._entries, InAdvancePagedList):
2899             if self._entries._pagesize == 1:
2900                 return self._entries._pagecount
2901
2902     @functools.cached_property
2903     def _getter(self):
2904         if isinstance(self._entries, list):
2905             def get_entry(i):
2906                 try:
2907                     entry = self._entries[i]
2908                 except IndexError:
2909                     entry = self.MissingEntry
2910                     if not self.is_incomplete:
2911                         raise self.IndexError()
2912                 if entry is self.MissingEntry:
2913                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
2914                 return entry
2915         else:
2916             def get_entry(i):
2917                 try:
2918                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
2919                 except (LazyList.IndexError, PagedList.IndexError):
2920                     raise self.IndexError()
2921         return get_entry
2922
2923     def __getitem__(self, idx):
2924         if isinstance(idx, int):
2925             idx = slice(idx, idx)
2926
2927         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
2928         step = 1 if idx.step is None else idx.step
2929         if idx.start is None:
2930             start = 0 if step > 0 else len(self) - 1
2931         else:
2932             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
2933
2934         # NB: Do not call len(self) when idx == [:]
2935         if idx.stop is None:
2936             stop = 0 if step < 0 else float('inf')
2937         else:
2938             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
2939         stop += [-1, 1][step > 0]
2940
2941         for i in frange(start, stop, step):
2942             if i < 0:
2943                 continue
2944             try:
2945                 entry = self._getter(i)
2946             except self.IndexError:
2947                 self.is_exhausted = True
2948                 if step > 0:
2949                     break
2950                 continue
2951             yield i + 1, entry
2952
2953     def __len__(self):
2954         return len(tuple(self[:]))
2955
2956     class IndexError(IndexError):
2957         pass
2958
2959
2960 def uppercase_escape(s):
2961     unicode_escape = codecs.getdecoder('unicode_escape')
2962     return re.sub(
2963         r'\\U[0-9a-fA-F]{8}',
2964         lambda m: unicode_escape(m.group(0))[0],
2965         s)
2966
2967
2968 def lowercase_escape(s):
2969     unicode_escape = codecs.getdecoder('unicode_escape')
2970     return re.sub(
2971         r'\\u[0-9a-fA-F]{4}',
2972         lambda m: unicode_escape(m.group(0))[0],
2973         s)
2974
2975
2976 def escape_rfc3986(s):
2977     """Escape non-ASCII characters as suggested by RFC 3986"""
2978     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2979
2980
2981 def escape_url(url):
2982     """Escape URL as suggested by RFC 3986"""
2983     url_parsed = urllib.parse.urlparse(url)
2984     return url_parsed._replace(
2985         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2986         path=escape_rfc3986(url_parsed.path),
2987         params=escape_rfc3986(url_parsed.params),
2988         query=escape_rfc3986(url_parsed.query),
2989         fragment=escape_rfc3986(url_parsed.fragment)
2990     ).geturl()
2991
2992
2993 def parse_qs(url):
2994     return urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
2995
2996
2997 def read_batch_urls(batch_fd):
2998     def fixup(url):
2999         if not isinstance(url, str):
3000             url = url.decode('utf-8', 'replace')
3001         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3002         for bom in BOM_UTF8:
3003             if url.startswith(bom):
3004                 url = url[len(bom):]
3005         url = url.lstrip()
3006         if not url or url.startswith(('#', ';', ']')):
3007             return False
3008         # "#" cannot be stripped out since it is part of the URI
3009         # However, it can be safely stripped out if following a whitespace
3010         return re.split(r'\s#', url, 1)[0].rstrip()
3011
3012     with contextlib.closing(batch_fd) as fd:
3013         return [url for url in map(fixup, fd) if url]
3014
3015
3016 def urlencode_postdata(*args, **kargs):
3017     return urllib.parse.urlencode(*args, **kargs).encode('ascii')
3018
3019
3020 def update_url_query(url, query):
3021     if not query:
3022         return url
3023     parsed_url = urllib.parse.urlparse(url)
3024     qs = urllib.parse.parse_qs(parsed_url.query)
3025     qs.update(query)
3026     return urllib.parse.urlunparse(parsed_url._replace(
3027         query=urllib.parse.urlencode(qs, True)))
3028
3029
3030 def update_Request(req, url=None, data=None, headers=None, query=None):
3031     req_headers = req.headers.copy()
3032     req_headers.update(headers or {})
3033     req_data = data or req.data
3034     req_url = update_url_query(url or req.get_full_url(), query)
3035     req_get_method = req.get_method()
3036     if req_get_method == 'HEAD':
3037         req_type = HEADRequest
3038     elif req_get_method == 'PUT':
3039         req_type = PUTRequest
3040     else:
3041         req_type = urllib.request.Request
3042     new_req = req_type(
3043         req_url, data=req_data, headers=req_headers,
3044         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3045     if hasattr(req, 'timeout'):
3046         new_req.timeout = req.timeout
3047     return new_req
3048
3049
3050 def _multipart_encode_impl(data, boundary):
3051     content_type = 'multipart/form-data; boundary=%s' % boundary
3052
3053     out = b''
3054     for k, v in data.items():
3055         out += b'--' + boundary.encode('ascii') + b'\r\n'
3056         if isinstance(k, str):
3057             k = k.encode()
3058         if isinstance(v, str):
3059             v = v.encode()
3060         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3061         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3062         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3063         if boundary.encode('ascii') in content:
3064             raise ValueError('Boundary overlaps with data')
3065         out += content
3066
3067     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3068
3069     return out, content_type
3070
3071
3072 def multipart_encode(data, boundary=None):
3073     '''
3074     Encode a dict to RFC 7578-compliant form-data
3075
3076     data:
3077         A dict where keys and values can be either Unicode or bytes-like
3078         objects.
3079     boundary:
3080         If specified a Unicode object, it's used as the boundary. Otherwise
3081         a random boundary is generated.
3082
3083     Reference: https://tools.ietf.org/html/rfc7578
3084     '''
3085     has_specified_boundary = boundary is not None
3086
3087     while True:
3088         if boundary is None:
3089             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3090
3091         try:
3092             out, content_type = _multipart_encode_impl(data, boundary)
3093             break
3094         except ValueError:
3095             if has_specified_boundary:
3096                 raise
3097             boundary = None
3098
3099     return out, content_type
3100
3101
3102 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3103     for val in map(d.get, variadic(key_or_keys)):
3104         if val is not None and (val or not skip_false_values):
3105             return val
3106     return default
3107
3108
3109 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3110     for f in funcs:
3111         try:
3112             val = f(*args, **kwargs)
3113         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
3114             pass
3115         else:
3116             if expected_type is None or isinstance(val, expected_type):
3117                 return val
3118
3119
3120 def try_get(src, getter, expected_type=None):
3121     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3122
3123
3124 def filter_dict(dct, cndn=lambda _, v: v is not None):
3125     return {k: v for k, v in dct.items() if cndn(k, v)}
3126
3127
3128 def merge_dicts(*dicts):
3129     merged = {}
3130     for a_dict in dicts:
3131         for k, v in a_dict.items():
3132             if (v is not None and k not in merged
3133                     or isinstance(v, str) and merged[k] == ''):
3134                 merged[k] = v
3135     return merged
3136
3137
3138 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3139     return string if isinstance(string, str) else str(string, encoding, errors)
3140
3141
3142 US_RATINGS = {
3143     'G': 0,
3144     'PG': 10,
3145     'PG-13': 13,
3146     'R': 16,
3147     'NC': 18,
3148 }
3149
3150
3151 TV_PARENTAL_GUIDELINES = {
3152     'TV-Y': 0,
3153     'TV-Y7': 7,
3154     'TV-G': 0,
3155     'TV-PG': 0,
3156     'TV-14': 14,
3157     'TV-MA': 17,
3158 }
3159
3160
3161 def parse_age_limit(s):
3162     # isinstance(False, int) is True. So type() must be used instead
3163     if type(s) is int:  # noqa: E721
3164         return s if 0 <= s <= 21 else None
3165     elif not isinstance(s, str):
3166         return None
3167     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3168     if m:
3169         return int(m.group('age'))
3170     s = s.upper()
3171     if s in US_RATINGS:
3172         return US_RATINGS[s]
3173     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3174     if m:
3175         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3176     return None
3177
3178
3179 def strip_jsonp(code):
3180     return re.sub(
3181         r'''(?sx)^
3182             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3183             (?:\s*&&\s*(?P=func_name))?
3184             \s*\(\s*(?P<callback_data>.*)\);?
3185             \s*?(?://[^\n]*)*$''',
3186         r'\g<callback_data>', code)
3187
3188
3189 def js_to_json(code, vars={}):
3190     # vars is a dict of var, val pairs to substitute
3191     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3192     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3193     INTEGER_TABLE = (
3194         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3195         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3196     )
3197
3198     def fix_kv(m):
3199         v = m.group(0)
3200         if v in ('true', 'false', 'null'):
3201             return v
3202         elif v in ('undefined', 'void 0'):
3203             return 'null'
3204         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3205             return ""
3206
3207         if v[0] in ("'", '"'):
3208             v = re.sub(r'(?s)\\.|"', lambda m: {
3209                 '"': '\\"',
3210                 "\\'": "'",
3211                 '\\\n': '',
3212                 '\\x': '\\u00',
3213             }.get(m.group(0), m.group(0)), v[1:-1])
3214         else:
3215             for regex, base in INTEGER_TABLE:
3216                 im = re.match(regex, v)
3217                 if im:
3218                     i = int(im.group(1), base)
3219                     return '"%d":' % i if v.endswith(':') else '%d' % i
3220
3221             if v in vars:
3222                 return vars[v]
3223
3224         return '"%s"' % v
3225
3226     def create_map(mobj):
3227         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
3228
3229     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3230     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
3231
3232     return re.sub(r'''(?sx)
3233         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
3234         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
3235         {comment}|,(?={skip}[\]}}])|
3236         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3237         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3238         [0-9]+(?={skip}:)|
3239         !+
3240         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3241
3242
3243 def qualities(quality_ids):
3244     """ Get a numeric quality value out of a list of possible values """
3245     def q(qid):
3246         try:
3247             return quality_ids.index(qid)
3248         except ValueError:
3249             return -1
3250     return q
3251
3252
3253 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3254
3255
3256 DEFAULT_OUTTMPL = {
3257     'default': '%(title)s [%(id)s].%(ext)s',
3258     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3259 }
3260 OUTTMPL_TYPES = {
3261     'chapter': None,
3262     'subtitle': None,
3263     'thumbnail': None,
3264     'description': 'description',
3265     'annotation': 'annotations.xml',
3266     'infojson': 'info.json',
3267     'link': None,
3268     'pl_video': None,
3269     'pl_thumbnail': None,
3270     'pl_description': 'description',
3271     'pl_infojson': 'info.json',
3272 }
3273
3274 # As of [1] format syntax is:
3275 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3276 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3277 STR_FORMAT_RE_TMPL = r'''(?x)
3278     (?<!%)(?P<prefix>(?:%%)*)
3279     %
3280     (?P<has_key>\((?P<key>{0})\))?
3281     (?P<format>
3282         (?P<conversion>[#0\-+ ]+)?
3283         (?P<min_width>\d+)?
3284         (?P<precision>\.\d+)?
3285         (?P<len_mod>[hlL])?  # unused in python
3286         {1}  # conversion type
3287     )
3288 '''
3289
3290
3291 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3292
3293
3294 def limit_length(s, length):
3295     """ Add ellipses to overly long strings """
3296     if s is None:
3297         return None
3298     ELLIPSES = '...'
3299     if len(s) > length:
3300         return s[:length - len(ELLIPSES)] + ELLIPSES
3301     return s
3302
3303
3304 def version_tuple(v):
3305     return tuple(int(e) for e in re.split(r'[-.]', v))
3306
3307
3308 def is_outdated_version(version, limit, assume_new=True):
3309     if not version:
3310         return not assume_new
3311     try:
3312         return version_tuple(version) < version_tuple(limit)
3313     except ValueError:
3314         return not assume_new
3315
3316
3317 def ytdl_is_updateable():
3318     """ Returns if yt-dlp can be updated with -U """
3319
3320     from .update import is_non_updateable
3321
3322     return not is_non_updateable()
3323
3324
3325 def args_to_str(args):
3326     # Get a short string representation for a subprocess command
3327     return ' '.join(compat_shlex_quote(a) for a in args)
3328
3329
3330 def error_to_compat_str(err):
3331     return str(err)
3332
3333
3334 def error_to_str(err):
3335     return f'{type(err).__name__}: {err}'
3336
3337
3338 def mimetype2ext(mt):
3339     if mt is None:
3340         return None
3341
3342     mt, _, params = mt.partition(';')
3343     mt = mt.strip()
3344
3345     FULL_MAP = {
3346         'audio/mp4': 'm4a',
3347         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3348         # it's the most popular one
3349         'audio/mpeg': 'mp3',
3350         'audio/x-wav': 'wav',
3351         'audio/wav': 'wav',
3352         'audio/wave': 'wav',
3353     }
3354
3355     ext = FULL_MAP.get(mt)
3356     if ext is not None:
3357         return ext
3358
3359     SUBTYPE_MAP = {
3360         '3gpp': '3gp',
3361         'smptett+xml': 'tt',
3362         'ttaf+xml': 'dfxp',
3363         'ttml+xml': 'ttml',
3364         'x-flv': 'flv',
3365         'x-mp4-fragmented': 'mp4',
3366         'x-ms-sami': 'sami',
3367         'x-ms-wmv': 'wmv',
3368         'mpegurl': 'm3u8',
3369         'x-mpegurl': 'm3u8',
3370         'vnd.apple.mpegurl': 'm3u8',
3371         'dash+xml': 'mpd',
3372         'f4m+xml': 'f4m',
3373         'hds+xml': 'f4m',
3374         'vnd.ms-sstr+xml': 'ism',
3375         'quicktime': 'mov',
3376         'mp2t': 'ts',
3377         'x-wav': 'wav',
3378         'filmstrip+json': 'fs',
3379         'svg+xml': 'svg',
3380     }
3381
3382     _, _, subtype = mt.rpartition('/')
3383     ext = SUBTYPE_MAP.get(subtype.lower())
3384     if ext is not None:
3385         return ext
3386
3387     SUFFIX_MAP = {
3388         'json': 'json',
3389         'xml': 'xml',
3390         'zip': 'zip',
3391         'gzip': 'gz',
3392     }
3393
3394     _, _, suffix = subtype.partition('+')
3395     ext = SUFFIX_MAP.get(suffix)
3396     if ext is not None:
3397         return ext
3398
3399     return subtype.replace('+', '.')
3400
3401
3402 def ext2mimetype(ext_or_url):
3403     if not ext_or_url:
3404         return None
3405     if '.' not in ext_or_url:
3406         ext_or_url = f'file.{ext_or_url}'
3407     return mimetypes.guess_type(ext_or_url)[0]
3408
3409
3410 def parse_codecs(codecs_str):
3411     # http://tools.ietf.org/html/rfc6381
3412     if not codecs_str:
3413         return {}
3414     split_codecs = list(filter(None, map(
3415         str.strip, codecs_str.strip().strip(',').split(','))))
3416     vcodec, acodec, scodec, hdr = None, None, None, None
3417     for full_codec in split_codecs:
3418         parts = full_codec.split('.')
3419         codec = parts[0].replace('0', '')
3420         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3421                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3422             if not vcodec:
3423                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
3424                 if codec in ('dvh1', 'dvhe'):
3425                     hdr = 'DV'
3426                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
3427                     hdr = 'HDR10'
3428                 elif full_codec.replace('0', '').startswith('vp9.2'):
3429                     hdr = 'HDR10'
3430         elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3431             if not acodec:
3432                 acodec = full_codec
3433         elif codec in ('stpp', 'wvtt',):
3434             if not scodec:
3435                 scodec = full_codec
3436         else:
3437             write_string(f'WARNING: Unknown codec {full_codec}\n')
3438     if vcodec or acodec or scodec:
3439         return {
3440             'vcodec': vcodec or 'none',
3441             'acodec': acodec or 'none',
3442             'dynamic_range': hdr,
3443             **({'scodec': scodec} if scodec is not None else {}),
3444         }
3445     elif len(split_codecs) == 2:
3446         return {
3447             'vcodec': split_codecs[0],
3448             'acodec': split_codecs[1],
3449         }
3450     return {}
3451
3452
3453 def urlhandle_detect_ext(url_handle):
3454     getheader = url_handle.headers.get
3455
3456     cd = getheader('Content-Disposition')
3457     if cd:
3458         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3459         if m:
3460             e = determine_ext(m.group('filename'), default_ext=None)
3461             if e:
3462                 return e
3463
3464     return mimetype2ext(getheader('Content-Type'))
3465
3466
3467 def encode_data_uri(data, mime_type):
3468     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3469
3470
3471 def age_restricted(content_limit, age_limit):
3472     """ Returns True iff the content should be blocked """
3473
3474     if age_limit is None:  # No limit set
3475         return False
3476     if content_limit is None:
3477         return False  # Content available for everyone
3478     return age_limit < content_limit
3479
3480
3481 def is_html(first_bytes):
3482     """ Detect whether a file contains HTML by examining its first bytes. """
3483
3484     BOMS = [
3485         (b'\xef\xbb\xbf', 'utf-8'),
3486         (b'\x00\x00\xfe\xff', 'utf-32-be'),
3487         (b'\xff\xfe\x00\x00', 'utf-32-le'),
3488         (b'\xff\xfe', 'utf-16-le'),
3489         (b'\xfe\xff', 'utf-16-be'),
3490     ]
3491
3492     encoding = 'utf-8'
3493     for bom, enc in BOMS:
3494         while first_bytes.startswith(bom):
3495             encoding, first_bytes = enc, first_bytes[len(bom):]
3496
3497     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3498
3499
3500 def determine_protocol(info_dict):
3501     protocol = info_dict.get('protocol')
3502     if protocol is not None:
3503         return protocol
3504
3505     url = sanitize_url(info_dict['url'])
3506     if url.startswith('rtmp'):
3507         return 'rtmp'
3508     elif url.startswith('mms'):
3509         return 'mms'
3510     elif url.startswith('rtsp'):
3511         return 'rtsp'
3512
3513     ext = determine_ext(url)
3514     if ext == 'm3u8':
3515         return 'm3u8'
3516     elif ext == 'f4m':
3517         return 'f4m'
3518
3519     return urllib.parse.urlparse(url).scheme
3520
3521
3522 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3523     """ Render a list of rows, each as a list of values.
3524     Text after a \t will be right aligned """
3525     def width(string):
3526         return len(remove_terminal_sequences(string).replace('\t', ''))
3527
3528     def get_max_lens(table):
3529         return [max(width(str(v)) for v in col) for col in zip(*table)]
3530
3531     def filter_using_list(row, filterArray):
3532         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3533
3534     max_lens = get_max_lens(data) if hide_empty else []
3535     header_row = filter_using_list(header_row, max_lens)
3536     data = [filter_using_list(row, max_lens) for row in data]
3537
3538     table = [header_row] + data
3539     max_lens = get_max_lens(table)
3540     extra_gap += 1
3541     if delim:
3542         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3543         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3544     for row in table:
3545         for pos, text in enumerate(map(str, row)):
3546             if '\t' in text:
3547                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3548             else:
3549                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3550     ret = '\n'.join(''.join(row).rstrip() for row in table)
3551     return ret
3552
3553
3554 def _match_one(filter_part, dct, incomplete):
3555     # TODO: Generalize code with YoutubeDL._build_format_filter
3556     STRING_OPERATORS = {
3557         '*=': operator.contains,
3558         '^=': lambda attr, value: attr.startswith(value),
3559         '$=': lambda attr, value: attr.endswith(value),
3560         '~=': lambda attr, value: re.search(value, attr),
3561     }
3562     COMPARISON_OPERATORS = {
3563         **STRING_OPERATORS,
3564         '<=': operator.le,  # "<=" must be defined above "<"
3565         '<': operator.lt,
3566         '>=': operator.ge,
3567         '>': operator.gt,
3568         '=': operator.eq,
3569     }
3570
3571     if isinstance(incomplete, bool):
3572         is_incomplete = lambda _: incomplete
3573     else:
3574         is_incomplete = lambda k: k in incomplete
3575
3576     operator_rex = re.compile(r'''(?x)
3577         (?P<key>[a-z_]+)
3578         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3579         (?:
3580             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3581             (?P<strval>.+?)
3582         )
3583         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3584     m = operator_rex.fullmatch(filter_part.strip())
3585     if m:
3586         m = m.groupdict()
3587         unnegated_op = COMPARISON_OPERATORS[m['op']]
3588         if m['negation']:
3589             op = lambda attr, value: not unnegated_op(attr, value)
3590         else:
3591             op = unnegated_op
3592         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3593         if m['quote']:
3594             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3595         actual_value = dct.get(m['key'])
3596         numeric_comparison = None
3597         if isinstance(actual_value, (int, float)):
3598             # If the original field is a string and matching comparisonvalue is
3599             # a number we should respect the origin of the original field
3600             # and process comparison value as a string (see
3601             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3602             try:
3603                 numeric_comparison = int(comparison_value)
3604             except ValueError:
3605                 numeric_comparison = parse_filesize(comparison_value)
3606                 if numeric_comparison is None:
3607                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3608                 if numeric_comparison is None:
3609                     numeric_comparison = parse_duration(comparison_value)
3610         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3611             raise ValueError('Operator %s only supports string values!' % m['op'])
3612         if actual_value is None:
3613             return is_incomplete(m['key']) or m['none_inclusive']
3614         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3615
3616     UNARY_OPERATORS = {
3617         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3618         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3619     }
3620     operator_rex = re.compile(r'''(?x)
3621         (?P<op>%s)\s*(?P<key>[a-z_]+)
3622         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3623     m = operator_rex.fullmatch(filter_part.strip())
3624     if m:
3625         op = UNARY_OPERATORS[m.group('op')]
3626         actual_value = dct.get(m.group('key'))
3627         if is_incomplete(m.group('key')) and actual_value is None:
3628             return True
3629         return op(actual_value)
3630
3631     raise ValueError('Invalid filter part %r' % filter_part)
3632
3633
3634 def match_str(filter_str, dct, incomplete=False):
3635     """ Filter a dictionary with a simple string syntax.
3636     @returns           Whether the filter passes
3637     @param incomplete  Set of keys that is expected to be missing from dct.
3638                        Can be True/False to indicate all/none of the keys may be missing.
3639                        All conditions on incomplete keys pass if the key is missing
3640     """
3641     return all(
3642         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3643         for filter_part in re.split(r'(?<!\\)&', filter_str))
3644
3645
3646 def match_filter_func(filters):
3647     if not filters:
3648         return None
3649     filters = set(variadic(filters))
3650
3651     interactive = '-' in filters
3652     if interactive:
3653         filters.remove('-')
3654
3655     def _match_func(info_dict, incomplete=False):
3656         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3657             return NO_DEFAULT if interactive and not incomplete else None
3658         else:
3659             video_title = info_dict.get('title') or info_dict.get('id') or 'video'
3660             filter_str = ') | ('.join(map(str.strip, filters))
3661             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3662     return _match_func
3663
3664
3665 def download_range_func(chapters, ranges):
3666     def inner(info_dict, ydl):
3667         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3668                    else 'Cannot match chapters since chapter information is unavailable')
3669         for regex in chapters or []:
3670             for i, chapter in enumerate(info_dict.get('chapters') or []):
3671                 if re.search(regex, chapter['title']):
3672                     warning = None
3673                     yield {**chapter, 'index': i}
3674         if chapters and warning:
3675             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3676
3677         yield from ({'start_time': start, 'end_time': end} for start, end in ranges or [])
3678
3679     return inner
3680
3681
3682 def parse_dfxp_time_expr(time_expr):
3683     if not time_expr:
3684         return
3685
3686     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3687     if mobj:
3688         return float(mobj.group('time_offset'))
3689
3690     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3691     if mobj:
3692         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3693
3694
3695 def srt_subtitles_timecode(seconds):
3696     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3697
3698
3699 def ass_subtitles_timecode(seconds):
3700     time = timetuple_from_msec(seconds * 1000)
3701     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3702
3703
3704 def dfxp2srt(dfxp_data):
3705     '''
3706     @param dfxp_data A bytes-like object containing DFXP data
3707     @returns A unicode object containing converted SRT data
3708     '''
3709     LEGACY_NAMESPACES = (
3710         (b'http://www.w3.org/ns/ttml', [
3711             b'http://www.w3.org/2004/11/ttaf1',
3712             b'http://www.w3.org/2006/04/ttaf1',
3713             b'http://www.w3.org/2006/10/ttaf1',
3714         ]),
3715         (b'http://www.w3.org/ns/ttml#styling', [
3716             b'http://www.w3.org/ns/ttml#style',
3717         ]),
3718     )
3719
3720     SUPPORTED_STYLING = [
3721         'color',
3722         'fontFamily',
3723         'fontSize',
3724         'fontStyle',
3725         'fontWeight',
3726         'textDecoration'
3727     ]
3728
3729     _x = functools.partial(xpath_with_ns, ns_map={
3730         'xml': 'http://www.w3.org/XML/1998/namespace',
3731         'ttml': 'http://www.w3.org/ns/ttml',
3732         'tts': 'http://www.w3.org/ns/ttml#styling',
3733     })
3734
3735     styles = {}
3736     default_style = {}
3737
3738     class TTMLPElementParser:
3739         _out = ''
3740         _unclosed_elements = []
3741         _applied_styles = []
3742
3743         def start(self, tag, attrib):
3744             if tag in (_x('ttml:br'), 'br'):
3745                 self._out += '\n'
3746             else:
3747                 unclosed_elements = []
3748                 style = {}
3749                 element_style_id = attrib.get('style')
3750                 if default_style:
3751                     style.update(default_style)
3752                 if element_style_id:
3753                     style.update(styles.get(element_style_id, {}))
3754                 for prop in SUPPORTED_STYLING:
3755                     prop_val = attrib.get(_x('tts:' + prop))
3756                     if prop_val:
3757                         style[prop] = prop_val
3758                 if style:
3759                     font = ''
3760                     for k, v in sorted(style.items()):
3761                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3762                             continue
3763                         if k == 'color':
3764                             font += ' color="%s"' % v
3765                         elif k == 'fontSize':
3766                             font += ' size="%s"' % v
3767                         elif k == 'fontFamily':
3768                             font += ' face="%s"' % v
3769                         elif k == 'fontWeight' and v == 'bold':
3770                             self._out += '<b>'
3771                             unclosed_elements.append('b')
3772                         elif k == 'fontStyle' and v == 'italic':
3773                             self._out += '<i>'
3774                             unclosed_elements.append('i')
3775                         elif k == 'textDecoration' and v == 'underline':
3776                             self._out += '<u>'
3777                             unclosed_elements.append('u')
3778                     if font:
3779                         self._out += '<font' + font + '>'
3780                         unclosed_elements.append('font')
3781                     applied_style = {}
3782                     if self._applied_styles:
3783                         applied_style.update(self._applied_styles[-1])
3784                     applied_style.update(style)
3785                     self._applied_styles.append(applied_style)
3786                 self._unclosed_elements.append(unclosed_elements)
3787
3788         def end(self, tag):
3789             if tag not in (_x('ttml:br'), 'br'):
3790                 unclosed_elements = self._unclosed_elements.pop()
3791                 for element in reversed(unclosed_elements):
3792                     self._out += '</%s>' % element
3793                 if unclosed_elements and self._applied_styles:
3794                     self._applied_styles.pop()
3795
3796         def data(self, data):
3797             self._out += data
3798
3799         def close(self):
3800             return self._out.strip()
3801
3802     def parse_node(node):
3803         target = TTMLPElementParser()
3804         parser = xml.etree.ElementTree.XMLParser(target=target)
3805         parser.feed(xml.etree.ElementTree.tostring(node))
3806         return parser.close()
3807
3808     for k, v in LEGACY_NAMESPACES:
3809         for ns in v:
3810             dfxp_data = dfxp_data.replace(ns, k)
3811
3812     dfxp = compat_etree_fromstring(dfxp_data)
3813     out = []
3814     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3815
3816     if not paras:
3817         raise ValueError('Invalid dfxp/TTML subtitle')
3818
3819     repeat = False
3820     while True:
3821         for style in dfxp.findall(_x('.//ttml:style')):
3822             style_id = style.get('id') or style.get(_x('xml:id'))
3823             if not style_id:
3824                 continue
3825             parent_style_id = style.get('style')
3826             if parent_style_id:
3827                 if parent_style_id not in styles:
3828                     repeat = True
3829                     continue
3830                 styles[style_id] = styles[parent_style_id].copy()
3831             for prop in SUPPORTED_STYLING:
3832                 prop_val = style.get(_x('tts:' + prop))
3833                 if prop_val:
3834                     styles.setdefault(style_id, {})[prop] = prop_val
3835         if repeat:
3836             repeat = False
3837         else:
3838             break
3839
3840     for p in ('body', 'div'):
3841         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3842         if ele is None:
3843             continue
3844         style = styles.get(ele.get('style'))
3845         if not style:
3846             continue
3847         default_style.update(style)
3848
3849     for para, index in zip(paras, itertools.count(1)):
3850         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3851         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3852         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3853         if begin_time is None:
3854             continue
3855         if not end_time:
3856             if not dur:
3857                 continue
3858             end_time = begin_time + dur
3859         out.append('%d\n%s --> %s\n%s\n\n' % (
3860             index,
3861             srt_subtitles_timecode(begin_time),
3862             srt_subtitles_timecode(end_time),
3863             parse_node(para)))
3864
3865     return ''.join(out)
3866
3867
3868 def cli_option(params, command_option, param, separator=None):
3869     param = params.get(param)
3870     return ([] if param is None
3871             else [command_option, str(param)] if separator is None
3872             else [f'{command_option}{separator}{param}'])
3873
3874
3875 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3876     param = params.get(param)
3877     assert param in (True, False, None)
3878     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
3879
3880
3881 def cli_valueless_option(params, command_option, param, expected_value=True):
3882     return [command_option] if params.get(param) == expected_value else []
3883
3884
3885 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3886     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3887         if use_compat:
3888             return argdict
3889         else:
3890             argdict = None
3891     if argdict is None:
3892         return default
3893     assert isinstance(argdict, dict)
3894
3895     assert isinstance(keys, (list, tuple))
3896     for key_list in keys:
3897         arg_list = list(filter(
3898             lambda x: x is not None,
3899             [argdict.get(key.lower()) for key in variadic(key_list)]))
3900         if arg_list:
3901             return [arg for args in arg_list for arg in args]
3902     return default
3903
3904
3905 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3906     main_key, exe = main_key.lower(), exe.lower()
3907     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3908     keys = [f'{root_key}{k}' for k in (keys or [''])]
3909     if root_key in keys:
3910         if main_key != exe:
3911             keys.append((main_key, exe))
3912         keys.append('default')
3913     else:
3914         use_compat = False
3915     return cli_configuration_args(argdict, keys, default, use_compat)
3916
3917
3918 class ISO639Utils:
3919     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3920     _lang_map = {
3921         'aa': 'aar',
3922         'ab': 'abk',
3923         'ae': 'ave',
3924         'af': 'afr',
3925         'ak': 'aka',
3926         'am': 'amh',
3927         'an': 'arg',
3928         'ar': 'ara',
3929         'as': 'asm',
3930         'av': 'ava',
3931         'ay': 'aym',
3932         'az': 'aze',
3933         'ba': 'bak',
3934         'be': 'bel',
3935         'bg': 'bul',
3936         'bh': 'bih',
3937         'bi': 'bis',
3938         'bm': 'bam',
3939         'bn': 'ben',
3940         'bo': 'bod',
3941         'br': 'bre',
3942         'bs': 'bos',
3943         'ca': 'cat',
3944         'ce': 'che',
3945         'ch': 'cha',
3946         'co': 'cos',
3947         'cr': 'cre',
3948         'cs': 'ces',
3949         'cu': 'chu',
3950         'cv': 'chv',
3951         'cy': 'cym',
3952         'da': 'dan',
3953         'de': 'deu',
3954         'dv': 'div',
3955         'dz': 'dzo',
3956         'ee': 'ewe',
3957         'el': 'ell',
3958         'en': 'eng',
3959         'eo': 'epo',
3960         'es': 'spa',
3961         'et': 'est',
3962         'eu': 'eus',
3963         'fa': 'fas',
3964         'ff': 'ful',
3965         'fi': 'fin',
3966         'fj': 'fij',
3967         'fo': 'fao',
3968         'fr': 'fra',
3969         'fy': 'fry',
3970         'ga': 'gle',
3971         'gd': 'gla',
3972         'gl': 'glg',
3973         'gn': 'grn',
3974         'gu': 'guj',
3975         'gv': 'glv',
3976         'ha': 'hau',
3977         'he': 'heb',
3978         'iw': 'heb',  # Replaced by he in 1989 revision
3979         'hi': 'hin',
3980         'ho': 'hmo',
3981         'hr': 'hrv',
3982         'ht': 'hat',
3983         'hu': 'hun',
3984         'hy': 'hye',
3985         'hz': 'her',
3986         'ia': 'ina',
3987         'id': 'ind',
3988         'in': 'ind',  # Replaced by id in 1989 revision
3989         'ie': 'ile',
3990         'ig': 'ibo',
3991         'ii': 'iii',
3992         'ik': 'ipk',
3993         'io': 'ido',
3994         'is': 'isl',
3995         'it': 'ita',
3996         'iu': 'iku',
3997         'ja': 'jpn',
3998         'jv': 'jav',
3999         'ka': 'kat',
4000         'kg': 'kon',
4001         'ki': 'kik',
4002         'kj': 'kua',
4003         'kk': 'kaz',
4004         'kl': 'kal',
4005         'km': 'khm',
4006         'kn': 'kan',
4007         'ko': 'kor',
4008         'kr': 'kau',
4009         'ks': 'kas',
4010         'ku': 'kur',
4011         'kv': 'kom',
4012         'kw': 'cor',
4013         'ky': 'kir',
4014         'la': 'lat',
4015         'lb': 'ltz',
4016         'lg': 'lug',
4017         'li': 'lim',
4018         'ln': 'lin',
4019         'lo': 'lao',
4020         'lt': 'lit',
4021         'lu': 'lub',
4022         'lv': 'lav',
4023         'mg': 'mlg',
4024         'mh': 'mah',
4025         'mi': 'mri',
4026         'mk': 'mkd',
4027         'ml': 'mal',
4028         'mn': 'mon',
4029         'mr': 'mar',
4030         'ms': 'msa',
4031         'mt': 'mlt',
4032         'my': 'mya',
4033         'na': 'nau',
4034         'nb': 'nob',
4035         'nd': 'nde',
4036         'ne': 'nep',
4037         'ng': 'ndo',
4038         'nl': 'nld',
4039         'nn': 'nno',
4040         'no': 'nor',
4041         'nr': 'nbl',
4042         'nv': 'nav',
4043         'ny': 'nya',
4044         'oc': 'oci',
4045         'oj': 'oji',
4046         'om': 'orm',
4047         'or': 'ori',
4048         'os': 'oss',
4049         'pa': 'pan',
4050         'pi': 'pli',
4051         'pl': 'pol',
4052         'ps': 'pus',
4053         'pt': 'por',
4054         'qu': 'que',
4055         'rm': 'roh',
4056         'rn': 'run',
4057         'ro': 'ron',
4058         'ru': 'rus',
4059         'rw': 'kin',
4060         'sa': 'san',
4061         'sc': 'srd',
4062         'sd': 'snd',
4063         'se': 'sme',
4064         'sg': 'sag',
4065         'si': 'sin',
4066         'sk': 'slk',
4067         'sl': 'slv',
4068         'sm': 'smo',
4069         'sn': 'sna',
4070         'so': 'som',
4071         'sq': 'sqi',
4072         'sr': 'srp',
4073         'ss': 'ssw',
4074         'st': 'sot',
4075         'su': 'sun',
4076         'sv': 'swe',
4077         'sw': 'swa',
4078         'ta': 'tam',
4079         'te': 'tel',
4080         'tg': 'tgk',
4081         'th': 'tha',
4082         'ti': 'tir',
4083         'tk': 'tuk',
4084         'tl': 'tgl',
4085         'tn': 'tsn',
4086         'to': 'ton',
4087         'tr': 'tur',
4088         'ts': 'tso',
4089         'tt': 'tat',
4090         'tw': 'twi',
4091         'ty': 'tah',
4092         'ug': 'uig',
4093         'uk': 'ukr',
4094         'ur': 'urd',
4095         'uz': 'uzb',
4096         've': 'ven',
4097         'vi': 'vie',
4098         'vo': 'vol',
4099         'wa': 'wln',
4100         'wo': 'wol',
4101         'xh': 'xho',
4102         'yi': 'yid',
4103         'ji': 'yid',  # Replaced by yi in 1989 revision
4104         'yo': 'yor',
4105         'za': 'zha',
4106         'zh': 'zho',
4107         'zu': 'zul',
4108     }
4109
4110     @classmethod
4111     def short2long(cls, code):
4112         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4113         return cls._lang_map.get(code[:2])
4114
4115     @classmethod
4116     def long2short(cls, code):
4117         """Convert language code from ISO 639-2/T to ISO 639-1"""
4118         for short_name, long_name in cls._lang_map.items():
4119             if long_name == code:
4120                 return short_name
4121
4122
4123 class ISO3166Utils:
4124     # From http://data.okfn.org/data/core/country-list
4125     _country_map = {
4126         'AF': 'Afghanistan',
4127         'AX': 'Åland Islands',
4128         'AL': 'Albania',
4129         'DZ': 'Algeria',
4130         'AS': 'American Samoa',
4131         'AD': 'Andorra',
4132         'AO': 'Angola',
4133         'AI': 'Anguilla',
4134         'AQ': 'Antarctica',
4135         'AG': 'Antigua and Barbuda',
4136         'AR': 'Argentina',
4137         'AM': 'Armenia',
4138         'AW': 'Aruba',
4139         'AU': 'Australia',
4140         'AT': 'Austria',
4141         'AZ': 'Azerbaijan',
4142         'BS': 'Bahamas',
4143         'BH': 'Bahrain',
4144         'BD': 'Bangladesh',
4145         'BB': 'Barbados',
4146         'BY': 'Belarus',
4147         'BE': 'Belgium',
4148         'BZ': 'Belize',
4149         'BJ': 'Benin',
4150         'BM': 'Bermuda',
4151         'BT': 'Bhutan',
4152         'BO': 'Bolivia, Plurinational State of',
4153         'BQ': 'Bonaire, Sint Eustatius and Saba',
4154         'BA': 'Bosnia and Herzegovina',
4155         'BW': 'Botswana',
4156         'BV': 'Bouvet Island',
4157         'BR': 'Brazil',
4158         'IO': 'British Indian Ocean Territory',
4159         'BN': 'Brunei Darussalam',
4160         'BG': 'Bulgaria',
4161         'BF': 'Burkina Faso',
4162         'BI': 'Burundi',
4163         'KH': 'Cambodia',
4164         'CM': 'Cameroon',
4165         'CA': 'Canada',
4166         'CV': 'Cape Verde',
4167         'KY': 'Cayman Islands',
4168         'CF': 'Central African Republic',
4169         'TD': 'Chad',
4170         'CL': 'Chile',
4171         'CN': 'China',
4172         'CX': 'Christmas Island',
4173         'CC': 'Cocos (Keeling) Islands',
4174         'CO': 'Colombia',
4175         'KM': 'Comoros',
4176         'CG': 'Congo',
4177         'CD': 'Congo, the Democratic Republic of the',
4178         'CK': 'Cook Islands',
4179         'CR': 'Costa Rica',
4180         'CI': 'Côte d\'Ivoire',
4181         'HR': 'Croatia',
4182         'CU': 'Cuba',
4183         'CW': 'Curaçao',
4184         'CY': 'Cyprus',
4185         'CZ': 'Czech Republic',
4186         'DK': 'Denmark',
4187         'DJ': 'Djibouti',
4188         'DM': 'Dominica',
4189         'DO': 'Dominican Republic',
4190         'EC': 'Ecuador',
4191         'EG': 'Egypt',
4192         'SV': 'El Salvador',
4193         'GQ': 'Equatorial Guinea',
4194         'ER': 'Eritrea',
4195         'EE': 'Estonia',
4196         'ET': 'Ethiopia',
4197         'FK': 'Falkland Islands (Malvinas)',
4198         'FO': 'Faroe Islands',
4199         'FJ': 'Fiji',
4200         'FI': 'Finland',
4201         'FR': 'France',
4202         'GF': 'French Guiana',
4203         'PF': 'French Polynesia',
4204         'TF': 'French Southern Territories',
4205         'GA': 'Gabon',
4206         'GM': 'Gambia',
4207         'GE': 'Georgia',
4208         'DE': 'Germany',
4209         'GH': 'Ghana',
4210         'GI': 'Gibraltar',
4211         'GR': 'Greece',
4212         'GL': 'Greenland',
4213         'GD': 'Grenada',
4214         'GP': 'Guadeloupe',
4215         'GU': 'Guam',
4216         'GT': 'Guatemala',
4217         'GG': 'Guernsey',
4218         'GN': 'Guinea',
4219         'GW': 'Guinea-Bissau',
4220         'GY': 'Guyana',
4221         'HT': 'Haiti',
4222         'HM': 'Heard Island and McDonald Islands',
4223         'VA': 'Holy See (Vatican City State)',
4224         'HN': 'Honduras',
4225         'HK': 'Hong Kong',
4226         'HU': 'Hungary',
4227         'IS': 'Iceland',
4228         'IN': 'India',
4229         'ID': 'Indonesia',
4230         'IR': 'Iran, Islamic Republic of',
4231         'IQ': 'Iraq',
4232         'IE': 'Ireland',
4233         'IM': 'Isle of Man',
4234         'IL': 'Israel',
4235         'IT': 'Italy',
4236         'JM': 'Jamaica',
4237         'JP': 'Japan',
4238         'JE': 'Jersey',
4239         'JO': 'Jordan',
4240         'KZ': 'Kazakhstan',
4241         'KE': 'Kenya',
4242         'KI': 'Kiribati',
4243         'KP': 'Korea, Democratic People\'s Republic of',
4244         'KR': 'Korea, Republic of',
4245         'KW': 'Kuwait',
4246         'KG': 'Kyrgyzstan',
4247         'LA': 'Lao People\'s Democratic Republic',
4248         'LV': 'Latvia',
4249         'LB': 'Lebanon',
4250         'LS': 'Lesotho',
4251         'LR': 'Liberia',
4252         'LY': 'Libya',
4253         'LI': 'Liechtenstein',
4254         'LT': 'Lithuania',
4255         'LU': 'Luxembourg',
4256         'MO': 'Macao',
4257         'MK': 'Macedonia, the Former Yugoslav Republic of',
4258         'MG': 'Madagascar',
4259         'MW': 'Malawi',
4260         'MY': 'Malaysia',
4261         'MV': 'Maldives',
4262         'ML': 'Mali',
4263         'MT': 'Malta',
4264         'MH': 'Marshall Islands',
4265         'MQ': 'Martinique',
4266         'MR': 'Mauritania',
4267         'MU': 'Mauritius',
4268         'YT': 'Mayotte',
4269         'MX': 'Mexico',
4270         'FM': 'Micronesia, Federated States of',
4271         'MD': 'Moldova, Republic of',
4272         'MC': 'Monaco',
4273         'MN': 'Mongolia',
4274         'ME': 'Montenegro',
4275         'MS': 'Montserrat',
4276         'MA': 'Morocco',
4277         'MZ': 'Mozambique',
4278         'MM': 'Myanmar',
4279         'NA': 'Namibia',
4280         'NR': 'Nauru',
4281         'NP': 'Nepal',
4282         'NL': 'Netherlands',
4283         'NC': 'New Caledonia',
4284         'NZ': 'New Zealand',
4285         'NI': 'Nicaragua',
4286         'NE': 'Niger',
4287         'NG': 'Nigeria',
4288         'NU': 'Niue',
4289         'NF': 'Norfolk Island',
4290         'MP': 'Northern Mariana Islands',
4291         'NO': 'Norway',
4292         'OM': 'Oman',
4293         'PK': 'Pakistan',
4294         'PW': 'Palau',
4295         'PS': 'Palestine, State of',
4296         'PA': 'Panama',
4297         'PG': 'Papua New Guinea',
4298         'PY': 'Paraguay',
4299         'PE': 'Peru',
4300         'PH': 'Philippines',
4301         'PN': 'Pitcairn',
4302         'PL': 'Poland',
4303         'PT': 'Portugal',
4304         'PR': 'Puerto Rico',
4305         'QA': 'Qatar',
4306         'RE': 'Réunion',
4307         'RO': 'Romania',
4308         'RU': 'Russian Federation',
4309         'RW': 'Rwanda',
4310         'BL': 'Saint Barthélemy',
4311         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4312         'KN': 'Saint Kitts and Nevis',
4313         'LC': 'Saint Lucia',
4314         'MF': 'Saint Martin (French part)',
4315         'PM': 'Saint Pierre and Miquelon',
4316         'VC': 'Saint Vincent and the Grenadines',
4317         'WS': 'Samoa',
4318         'SM': 'San Marino',
4319         'ST': 'Sao Tome and Principe',
4320         'SA': 'Saudi Arabia',
4321         'SN': 'Senegal',
4322         'RS': 'Serbia',
4323         'SC': 'Seychelles',
4324         'SL': 'Sierra Leone',
4325         'SG': 'Singapore',
4326         'SX': 'Sint Maarten (Dutch part)',
4327         'SK': 'Slovakia',
4328         'SI': 'Slovenia',
4329         'SB': 'Solomon Islands',
4330         'SO': 'Somalia',
4331         'ZA': 'South Africa',
4332         'GS': 'South Georgia and the South Sandwich Islands',
4333         'SS': 'South Sudan',
4334         'ES': 'Spain',
4335         'LK': 'Sri Lanka',
4336         'SD': 'Sudan',
4337         'SR': 'Suriname',
4338         'SJ': 'Svalbard and Jan Mayen',
4339         'SZ': 'Swaziland',
4340         'SE': 'Sweden',
4341         'CH': 'Switzerland',
4342         'SY': 'Syrian Arab Republic',
4343         'TW': 'Taiwan, Province of China',
4344         'TJ': 'Tajikistan',
4345         'TZ': 'Tanzania, United Republic of',
4346         'TH': 'Thailand',
4347         'TL': 'Timor-Leste',
4348         'TG': 'Togo',
4349         'TK': 'Tokelau',
4350         'TO': 'Tonga',
4351         'TT': 'Trinidad and Tobago',
4352         'TN': 'Tunisia',
4353         'TR': 'Turkey',
4354         'TM': 'Turkmenistan',
4355         'TC': 'Turks and Caicos Islands',
4356         'TV': 'Tuvalu',
4357         'UG': 'Uganda',
4358         'UA': 'Ukraine',
4359         'AE': 'United Arab Emirates',
4360         'GB': 'United Kingdom',
4361         'US': 'United States',
4362         'UM': 'United States Minor Outlying Islands',
4363         'UY': 'Uruguay',
4364         'UZ': 'Uzbekistan',
4365         'VU': 'Vanuatu',
4366         'VE': 'Venezuela, Bolivarian Republic of',
4367         'VN': 'Viet Nam',
4368         'VG': 'Virgin Islands, British',
4369         'VI': 'Virgin Islands, U.S.',
4370         'WF': 'Wallis and Futuna',
4371         'EH': 'Western Sahara',
4372         'YE': 'Yemen',
4373         'ZM': 'Zambia',
4374         'ZW': 'Zimbabwe',
4375         # Not ISO 3166 codes, but used for IP blocks
4376         'AP': 'Asia/Pacific Region',
4377         'EU': 'Europe',
4378     }
4379
4380     @classmethod
4381     def short2full(cls, code):
4382         """Convert an ISO 3166-2 country code to the corresponding full name"""
4383         return cls._country_map.get(code.upper())
4384
4385
4386 class GeoUtils:
4387     # Major IPv4 address blocks per country
4388     _country_ip_map = {
4389         'AD': '46.172.224.0/19',
4390         'AE': '94.200.0.0/13',
4391         'AF': '149.54.0.0/17',
4392         'AG': '209.59.64.0/18',
4393         'AI': '204.14.248.0/21',
4394         'AL': '46.99.0.0/16',
4395         'AM': '46.70.0.0/15',
4396         'AO': '105.168.0.0/13',
4397         'AP': '182.50.184.0/21',
4398         'AQ': '23.154.160.0/24',
4399         'AR': '181.0.0.0/12',
4400         'AS': '202.70.112.0/20',
4401         'AT': '77.116.0.0/14',
4402         'AU': '1.128.0.0/11',
4403         'AW': '181.41.0.0/18',
4404         'AX': '185.217.4.0/22',
4405         'AZ': '5.197.0.0/16',
4406         'BA': '31.176.128.0/17',
4407         'BB': '65.48.128.0/17',
4408         'BD': '114.130.0.0/16',
4409         'BE': '57.0.0.0/8',
4410         'BF': '102.178.0.0/15',
4411         'BG': '95.42.0.0/15',
4412         'BH': '37.131.0.0/17',
4413         'BI': '154.117.192.0/18',
4414         'BJ': '137.255.0.0/16',
4415         'BL': '185.212.72.0/23',
4416         'BM': '196.12.64.0/18',
4417         'BN': '156.31.0.0/16',
4418         'BO': '161.56.0.0/16',
4419         'BQ': '161.0.80.0/20',
4420         'BR': '191.128.0.0/12',
4421         'BS': '24.51.64.0/18',
4422         'BT': '119.2.96.0/19',
4423         'BW': '168.167.0.0/16',
4424         'BY': '178.120.0.0/13',
4425         'BZ': '179.42.192.0/18',
4426         'CA': '99.224.0.0/11',
4427         'CD': '41.243.0.0/16',
4428         'CF': '197.242.176.0/21',
4429         'CG': '160.113.0.0/16',
4430         'CH': '85.0.0.0/13',
4431         'CI': '102.136.0.0/14',
4432         'CK': '202.65.32.0/19',
4433         'CL': '152.172.0.0/14',
4434         'CM': '102.244.0.0/14',
4435         'CN': '36.128.0.0/10',
4436         'CO': '181.240.0.0/12',
4437         'CR': '201.192.0.0/12',
4438         'CU': '152.206.0.0/15',
4439         'CV': '165.90.96.0/19',
4440         'CW': '190.88.128.0/17',
4441         'CY': '31.153.0.0/16',
4442         'CZ': '88.100.0.0/14',
4443         'DE': '53.0.0.0/8',
4444         'DJ': '197.241.0.0/17',
4445         'DK': '87.48.0.0/12',
4446         'DM': '192.243.48.0/20',
4447         'DO': '152.166.0.0/15',
4448         'DZ': '41.96.0.0/12',
4449         'EC': '186.68.0.0/15',
4450         'EE': '90.190.0.0/15',
4451         'EG': '156.160.0.0/11',
4452         'ER': '196.200.96.0/20',
4453         'ES': '88.0.0.0/11',
4454         'ET': '196.188.0.0/14',
4455         'EU': '2.16.0.0/13',
4456         'FI': '91.152.0.0/13',
4457         'FJ': '144.120.0.0/16',
4458         'FK': '80.73.208.0/21',
4459         'FM': '119.252.112.0/20',
4460         'FO': '88.85.32.0/19',
4461         'FR': '90.0.0.0/9',
4462         'GA': '41.158.0.0/15',
4463         'GB': '25.0.0.0/8',
4464         'GD': '74.122.88.0/21',
4465         'GE': '31.146.0.0/16',
4466         'GF': '161.22.64.0/18',
4467         'GG': '62.68.160.0/19',
4468         'GH': '154.160.0.0/12',
4469         'GI': '95.164.0.0/16',
4470         'GL': '88.83.0.0/19',
4471         'GM': '160.182.0.0/15',
4472         'GN': '197.149.192.0/18',
4473         'GP': '104.250.0.0/19',
4474         'GQ': '105.235.224.0/20',
4475         'GR': '94.64.0.0/13',
4476         'GT': '168.234.0.0/16',
4477         'GU': '168.123.0.0/16',
4478         'GW': '197.214.80.0/20',
4479         'GY': '181.41.64.0/18',
4480         'HK': '113.252.0.0/14',
4481         'HN': '181.210.0.0/16',
4482         'HR': '93.136.0.0/13',
4483         'HT': '148.102.128.0/17',
4484         'HU': '84.0.0.0/14',
4485         'ID': '39.192.0.0/10',
4486         'IE': '87.32.0.0/12',
4487         'IL': '79.176.0.0/13',
4488         'IM': '5.62.80.0/20',
4489         'IN': '117.192.0.0/10',
4490         'IO': '203.83.48.0/21',
4491         'IQ': '37.236.0.0/14',
4492         'IR': '2.176.0.0/12',
4493         'IS': '82.221.0.0/16',
4494         'IT': '79.0.0.0/10',
4495         'JE': '87.244.64.0/18',
4496         'JM': '72.27.0.0/17',
4497         'JO': '176.29.0.0/16',
4498         'JP': '133.0.0.0/8',
4499         'KE': '105.48.0.0/12',
4500         'KG': '158.181.128.0/17',
4501         'KH': '36.37.128.0/17',
4502         'KI': '103.25.140.0/22',
4503         'KM': '197.255.224.0/20',
4504         'KN': '198.167.192.0/19',
4505         'KP': '175.45.176.0/22',
4506         'KR': '175.192.0.0/10',
4507         'KW': '37.36.0.0/14',
4508         'KY': '64.96.0.0/15',
4509         'KZ': '2.72.0.0/13',
4510         'LA': '115.84.64.0/18',
4511         'LB': '178.135.0.0/16',
4512         'LC': '24.92.144.0/20',
4513         'LI': '82.117.0.0/19',
4514         'LK': '112.134.0.0/15',
4515         'LR': '102.183.0.0/16',
4516         'LS': '129.232.0.0/17',
4517         'LT': '78.56.0.0/13',
4518         'LU': '188.42.0.0/16',
4519         'LV': '46.109.0.0/16',
4520         'LY': '41.252.0.0/14',
4521         'MA': '105.128.0.0/11',
4522         'MC': '88.209.64.0/18',
4523         'MD': '37.246.0.0/16',
4524         'ME': '178.175.0.0/17',
4525         'MF': '74.112.232.0/21',
4526         'MG': '154.126.0.0/17',
4527         'MH': '117.103.88.0/21',
4528         'MK': '77.28.0.0/15',
4529         'ML': '154.118.128.0/18',
4530         'MM': '37.111.0.0/17',
4531         'MN': '49.0.128.0/17',
4532         'MO': '60.246.0.0/16',
4533         'MP': '202.88.64.0/20',
4534         'MQ': '109.203.224.0/19',
4535         'MR': '41.188.64.0/18',
4536         'MS': '208.90.112.0/22',
4537         'MT': '46.11.0.0/16',
4538         'MU': '105.16.0.0/12',
4539         'MV': '27.114.128.0/18',
4540         'MW': '102.70.0.0/15',
4541         'MX': '187.192.0.0/11',
4542         'MY': '175.136.0.0/13',
4543         'MZ': '197.218.0.0/15',
4544         'NA': '41.182.0.0/16',
4545         'NC': '101.101.0.0/18',
4546         'NE': '197.214.0.0/18',
4547         'NF': '203.17.240.0/22',
4548         'NG': '105.112.0.0/12',
4549         'NI': '186.76.0.0/15',
4550         'NL': '145.96.0.0/11',
4551         'NO': '84.208.0.0/13',
4552         'NP': '36.252.0.0/15',
4553         'NR': '203.98.224.0/19',
4554         'NU': '49.156.48.0/22',
4555         'NZ': '49.224.0.0/14',
4556         'OM': '5.36.0.0/15',
4557         'PA': '186.72.0.0/15',
4558         'PE': '186.160.0.0/14',
4559         'PF': '123.50.64.0/18',
4560         'PG': '124.240.192.0/19',
4561         'PH': '49.144.0.0/13',
4562         'PK': '39.32.0.0/11',
4563         'PL': '83.0.0.0/11',
4564         'PM': '70.36.0.0/20',
4565         'PR': '66.50.0.0/16',
4566         'PS': '188.161.0.0/16',
4567         'PT': '85.240.0.0/13',
4568         'PW': '202.124.224.0/20',
4569         'PY': '181.120.0.0/14',
4570         'QA': '37.210.0.0/15',
4571         'RE': '102.35.0.0/16',
4572         'RO': '79.112.0.0/13',
4573         'RS': '93.86.0.0/15',
4574         'RU': '5.136.0.0/13',
4575         'RW': '41.186.0.0/16',
4576         'SA': '188.48.0.0/13',
4577         'SB': '202.1.160.0/19',
4578         'SC': '154.192.0.0/11',
4579         'SD': '102.120.0.0/13',
4580         'SE': '78.64.0.0/12',
4581         'SG': '8.128.0.0/10',
4582         'SI': '188.196.0.0/14',
4583         'SK': '78.98.0.0/15',
4584         'SL': '102.143.0.0/17',
4585         'SM': '89.186.32.0/19',
4586         'SN': '41.82.0.0/15',
4587         'SO': '154.115.192.0/18',
4588         'SR': '186.179.128.0/17',
4589         'SS': '105.235.208.0/21',
4590         'ST': '197.159.160.0/19',
4591         'SV': '168.243.0.0/16',
4592         'SX': '190.102.0.0/20',
4593         'SY': '5.0.0.0/16',
4594         'SZ': '41.84.224.0/19',
4595         'TC': '65.255.48.0/20',
4596         'TD': '154.68.128.0/19',
4597         'TG': '196.168.0.0/14',
4598         'TH': '171.96.0.0/13',
4599         'TJ': '85.9.128.0/18',
4600         'TK': '27.96.24.0/21',
4601         'TL': '180.189.160.0/20',
4602         'TM': '95.85.96.0/19',
4603         'TN': '197.0.0.0/11',
4604         'TO': '175.176.144.0/21',
4605         'TR': '78.160.0.0/11',
4606         'TT': '186.44.0.0/15',
4607         'TV': '202.2.96.0/19',
4608         'TW': '120.96.0.0/11',
4609         'TZ': '156.156.0.0/14',
4610         'UA': '37.52.0.0/14',
4611         'UG': '102.80.0.0/13',
4612         'US': '6.0.0.0/8',
4613         'UY': '167.56.0.0/13',
4614         'UZ': '84.54.64.0/18',
4615         'VA': '212.77.0.0/19',
4616         'VC': '207.191.240.0/21',
4617         'VE': '186.88.0.0/13',
4618         'VG': '66.81.192.0/20',
4619         'VI': '146.226.0.0/16',
4620         'VN': '14.160.0.0/11',
4621         'VU': '202.80.32.0/20',
4622         'WF': '117.20.32.0/21',
4623         'WS': '202.4.32.0/19',
4624         'YE': '134.35.0.0/16',
4625         'YT': '41.242.116.0/22',
4626         'ZA': '41.0.0.0/11',
4627         'ZM': '102.144.0.0/13',
4628         'ZW': '102.177.192.0/18',
4629     }
4630
4631     @classmethod
4632     def random_ipv4(cls, code_or_block):
4633         if len(code_or_block) == 2:
4634             block = cls._country_ip_map.get(code_or_block.upper())
4635             if not block:
4636                 return None
4637         else:
4638             block = code_or_block
4639         addr, preflen = block.split('/')
4640         addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
4641         addr_max = addr_min | (0xffffffff >> int(preflen))
4642         return str(socket.inet_ntoa(
4643             struct.pack('!L', random.randint(addr_min, addr_max))))
4644
4645
4646 class PerRequestProxyHandler(urllib.request.ProxyHandler):
4647     def __init__(self, proxies=None):
4648         # Set default handlers
4649         for type in ('http', 'https'):
4650             setattr(self, '%s_open' % type,
4651                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4652                         meth(r, proxy, type))
4653         urllib.request.ProxyHandler.__init__(self, proxies)
4654
4655     def proxy_open(self, req, proxy, type):
4656         req_proxy = req.headers.get('Ytdl-request-proxy')
4657         if req_proxy is not None:
4658             proxy = req_proxy
4659             del req.headers['Ytdl-request-proxy']
4660
4661         if proxy == '__noproxy__':
4662             return None  # No Proxy
4663         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4664             req.add_header('Ytdl-socks-proxy', proxy)
4665             # yt-dlp's http/https handlers do wrapping the socket with socks
4666             return None
4667         return urllib.request.ProxyHandler.proxy_open(
4668             self, req, proxy, type)
4669
4670
4671 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4672 # released into Public Domain
4673 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4674
4675 def long_to_bytes(n, blocksize=0):
4676     """long_to_bytes(n:long, blocksize:int) : string
4677     Convert a long integer to a byte string.
4678
4679     If optional blocksize is given and greater than zero, pad the front of the
4680     byte string with binary zeros so that the length is a multiple of
4681     blocksize.
4682     """
4683     # after much testing, this algorithm was deemed to be the fastest
4684     s = b''
4685     n = int(n)
4686     while n > 0:
4687         s = struct.pack('>I', n & 0xffffffff) + s
4688         n = n >> 32
4689     # strip off leading zeros
4690     for i in range(len(s)):
4691         if s[i] != b'\000'[0]:
4692             break
4693     else:
4694         # only happens when n == 0
4695         s = b'\000'
4696         i = 0
4697     s = s[i:]
4698     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4699     # de-padding being done above, but sigh...
4700     if blocksize > 0 and len(s) % blocksize:
4701         s = (blocksize - len(s) % blocksize) * b'\000' + s
4702     return s
4703
4704
4705 def bytes_to_long(s):
4706     """bytes_to_long(string) : long
4707     Convert a byte string to a long integer.
4708
4709     This is (essentially) the inverse of long_to_bytes().
4710     """
4711     acc = 0
4712     length = len(s)
4713     if length % 4:
4714         extra = (4 - length % 4)
4715         s = b'\000' * extra + s
4716         length = length + extra
4717     for i in range(0, length, 4):
4718         acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
4719     return acc
4720
4721
4722 def ohdave_rsa_encrypt(data, exponent, modulus):
4723     '''
4724     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4725
4726     Input:
4727         data: data to encrypt, bytes-like object
4728         exponent, modulus: parameter e and N of RSA algorithm, both integer
4729     Output: hex string of encrypted data
4730
4731     Limitation: supports one block encryption only
4732     '''
4733
4734     payload = int(binascii.hexlify(data[::-1]), 16)
4735     encrypted = pow(payload, exponent, modulus)
4736     return '%x' % encrypted
4737
4738
4739 def pkcs1pad(data, length):
4740     """
4741     Padding input data with PKCS#1 scheme
4742
4743     @param {int[]} data        input data
4744     @param {int}   length      target length
4745     @returns {int[]}           padded data
4746     """
4747     if len(data) > length - 11:
4748         raise ValueError('Input data too long for PKCS#1 padding')
4749
4750     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4751     return [0, 2] + pseudo_random + [0] + data
4752
4753
4754 def _base_n_table(n, table):
4755     if not table and not n:
4756         raise ValueError('Either table or n must be specified')
4757     table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
4758
4759     if n and n != len(table):
4760         raise ValueError(f'base {n} exceeds table length {len(table)}')
4761     return table
4762
4763
4764 def encode_base_n(num, n=None, table=None):
4765     """Convert given int to a base-n string"""
4766     table = _base_n_table(n, table)
4767     if not num:
4768         return table[0]
4769
4770     result, base = '', len(table)
4771     while num:
4772         result = table[num % base] + result
4773         num = num // base
4774     return result
4775
4776
4777 def decode_base_n(string, n=None, table=None):
4778     """Convert given base-n string to int"""
4779     table = {char: index for index, char in enumerate(_base_n_table(n, table))}
4780     result, base = 0, len(table)
4781     for char in string:
4782         result = result * base + table[char]
4783     return result
4784
4785
4786 def decode_base(value, digits):
4787     write_string('DeprecationWarning: yt_dlp.utils.decode_base is deprecated '
4788                  'and may be removed in a future version. Use yt_dlp.decode_base_n instead')
4789     return decode_base_n(value, table=digits)
4790
4791
4792 def decode_packed_codes(code):
4793     mobj = re.search(PACKED_CODES_RE, code)
4794     obfuscated_code, base, count, symbols = mobj.groups()
4795     base = int(base)
4796     count = int(count)
4797     symbols = symbols.split('|')
4798     symbol_table = {}
4799
4800     while count:
4801         count -= 1
4802         base_n_count = encode_base_n(count, base)
4803         symbol_table[base_n_count] = symbols[count] or base_n_count
4804
4805     return re.sub(
4806         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4807         obfuscated_code)
4808
4809
4810 def caesar(s, alphabet, shift):
4811     if shift == 0:
4812         return s
4813     l = len(alphabet)
4814     return ''.join(
4815         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4816         for c in s)
4817
4818
4819 def rot47(s):
4820     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4821
4822
4823 def parse_m3u8_attributes(attrib):
4824     info = {}
4825     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4826         if val.startswith('"'):
4827             val = val[1:-1]
4828         info[key] = val
4829     return info
4830
4831
4832 def urshift(val, n):
4833     return val >> n if val >= 0 else (val + 0x100000000) >> n
4834
4835
4836 # Based on png2str() written by @gdkchan and improved by @yokrysty
4837 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4838 def decode_png(png_data):
4839     # Reference: https://www.w3.org/TR/PNG/
4840     header = png_data[8:]
4841
4842     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4843         raise OSError('Not a valid PNG file.')
4844
4845     int_map = {1: '>B', 2: '>H', 4: '>I'}
4846     unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
4847
4848     chunks = []
4849
4850     while header:
4851         length = unpack_integer(header[:4])
4852         header = header[4:]
4853
4854         chunk_type = header[:4]
4855         header = header[4:]
4856
4857         chunk_data = header[:length]
4858         header = header[length:]
4859
4860         header = header[4:]  # Skip CRC
4861
4862         chunks.append({
4863             'type': chunk_type,
4864             'length': length,
4865             'data': chunk_data
4866         })
4867
4868     ihdr = chunks[0]['data']
4869
4870     width = unpack_integer(ihdr[:4])
4871     height = unpack_integer(ihdr[4:8])
4872
4873     idat = b''
4874
4875     for chunk in chunks:
4876         if chunk['type'] == b'IDAT':
4877             idat += chunk['data']
4878
4879     if not idat:
4880         raise OSError('Unable to read PNG data.')
4881
4882     decompressed_data = bytearray(zlib.decompress(idat))
4883
4884     stride = width * 3
4885     pixels = []
4886
4887     def _get_pixel(idx):
4888         x = idx % stride
4889         y = idx // stride
4890         return pixels[y][x]
4891
4892     for y in range(height):
4893         basePos = y * (1 + stride)
4894         filter_type = decompressed_data[basePos]
4895
4896         current_row = []
4897
4898         pixels.append(current_row)
4899
4900         for x in range(stride):
4901             color = decompressed_data[1 + basePos + x]
4902             basex = y * stride + x
4903             left = 0
4904             up = 0
4905
4906             if x > 2:
4907                 left = _get_pixel(basex - 3)
4908             if y > 0:
4909                 up = _get_pixel(basex - stride)
4910
4911             if filter_type == 1:  # Sub
4912                 color = (color + left) & 0xff
4913             elif filter_type == 2:  # Up
4914                 color = (color + up) & 0xff
4915             elif filter_type == 3:  # Average
4916                 color = (color + ((left + up) >> 1)) & 0xff
4917             elif filter_type == 4:  # Paeth
4918                 a = left
4919                 b = up
4920                 c = 0
4921
4922                 if x > 2 and y > 0:
4923                     c = _get_pixel(basex - stride - 3)
4924
4925                 p = a + b - c
4926
4927                 pa = abs(p - a)
4928                 pb = abs(p - b)
4929                 pc = abs(p - c)
4930
4931                 if pa <= pb and pa <= pc:
4932                     color = (color + a) & 0xff
4933                 elif pb <= pc:
4934                     color = (color + b) & 0xff
4935                 else:
4936                     color = (color + c) & 0xff
4937
4938             current_row.append(color)
4939
4940     return width, height, pixels
4941
4942
4943 def write_xattr(path, key, value):
4944     # Windows: Write xattrs to NTFS Alternate Data Streams:
4945     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4946     if compat_os_name == 'nt':
4947         assert ':' not in key
4948         assert os.path.exists(path)
4949
4950         try:
4951             with open(f'{path}:{key}', 'wb') as f:
4952                 f.write(value)
4953         except OSError as e:
4954             raise XAttrMetadataError(e.errno, e.strerror)
4955         return
4956
4957     # UNIX Method 1. Use xattrs/pyxattrs modules
4958
4959     setxattr = None
4960     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
4961         # Unicode arguments are not supported in pyxattr until version 0.5.0
4962         # See https://github.com/ytdl-org/youtube-dl/issues/5498
4963         if version_tuple(xattr.__version__) >= (0, 5, 0):
4964             setxattr = xattr.set
4965     elif xattr:
4966         setxattr = xattr.setxattr
4967
4968     if setxattr:
4969         try:
4970             setxattr(path, key, value)
4971         except OSError as e:
4972             raise XAttrMetadataError(e.errno, e.strerror)
4973         return
4974
4975     # UNIX Method 2. Use setfattr/xattr executables
4976     exe = ('setfattr' if check_executable('setfattr', ['--version'])
4977            else 'xattr' if check_executable('xattr', ['-h']) else None)
4978     if not exe:
4979         raise XAttrUnavailableError(
4980             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
4981             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
4982
4983     value = value.decode()
4984     try:
4985         _, stderr, returncode = Popen.run(
4986             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
4987             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
4988     except OSError as e:
4989         raise XAttrMetadataError(e.errno, e.strerror)
4990     if returncode:
4991         raise XAttrMetadataError(returncode, stderr)
4992
4993
4994 def random_birthday(year_field, month_field, day_field):
4995     start_date = datetime.date(1950, 1, 1)
4996     end_date = datetime.date(1995, 12, 31)
4997     offset = random.randint(0, (end_date - start_date).days)
4998     random_date = start_date + datetime.timedelta(offset)
4999     return {
5000         year_field: str(random_date.year),
5001         month_field: str(random_date.month),
5002         day_field: str(random_date.day),
5003     }
5004
5005
5006 # Templates for internet shortcut files, which are plain text files.
5007 DOT_URL_LINK_TEMPLATE = '''\
5008 [InternetShortcut]
5009 URL=%(url)s
5010 '''
5011
5012 DOT_WEBLOC_LINK_TEMPLATE = '''\
5013 <?xml version="1.0" encoding="UTF-8"?>
5014 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5015 <plist version="1.0">
5016 <dict>
5017 \t<key>URL</key>
5018 \t<string>%(url)s</string>
5019 </dict>
5020 </plist>
5021 '''
5022
5023 DOT_DESKTOP_LINK_TEMPLATE = '''\
5024 [Desktop Entry]
5025 Encoding=UTF-8
5026 Name=%(filename)s
5027 Type=Link
5028 URL=%(url)s
5029 Icon=text-html
5030 '''
5031
5032 LINK_TEMPLATES = {
5033     'url': DOT_URL_LINK_TEMPLATE,
5034     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5035     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5036 }
5037
5038
5039 def iri_to_uri(iri):
5040     """
5041     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5042
5043     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5044     """
5045
5046     iri_parts = urllib.parse.urlparse(iri)
5047
5048     if '[' in iri_parts.netloc:
5049         raise ValueError('IPv6 URIs are not, yet, supported.')
5050         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5051
5052     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5053
5054     net_location = ''
5055     if iri_parts.username:
5056         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5057         if iri_parts.password is not None:
5058             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5059         net_location += '@'
5060
5061     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5062     # The 'idna' encoding produces ASCII text.
5063     if iri_parts.port is not None and iri_parts.port != 80:
5064         net_location += ':' + str(iri_parts.port)
5065
5066     return urllib.parse.urlunparse(
5067         (iri_parts.scheme,
5068             net_location,
5069
5070             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5071
5072             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5073             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5074
5075             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5076             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5077
5078             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5079
5080     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5081
5082
5083 def to_high_limit_path(path):
5084     if sys.platform in ['win32', 'cygwin']:
5085         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5086         return '\\\\?\\' + os.path.abspath(path)
5087
5088     return path
5089
5090
5091 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
5092     val = traverse_obj(obj, *variadic(field))
5093     if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
5094         return default
5095     return template % func(val)
5096
5097
5098 def clean_podcast_url(url):
5099     return re.sub(r'''(?x)
5100         (?:
5101             (?:
5102                 chtbl\.com/track|
5103                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5104                 play\.podtrac\.com
5105             )/[^/]+|
5106             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5107             flex\.acast\.com|
5108             pd(?:
5109                 cn\.co| # https://podcorn.com/analytics-prefix/
5110                 st\.fm # https://podsights.com/docs/
5111             )/e
5112         )/''', '', url)
5113
5114
5115 _HEX_TABLE = '0123456789abcdef'
5116
5117
5118 def random_uuidv4():
5119     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5120
5121
5122 def make_dir(path, to_screen=None):
5123     try:
5124         dn = os.path.dirname(path)
5125         if dn and not os.path.exists(dn):
5126             os.makedirs(dn)
5127         return True
5128     except OSError as err:
5129         if callable(to_screen) is not None:
5130             to_screen('unable to create directory ' + error_to_compat_str(err))
5131         return False
5132
5133
5134 def get_executable_path():
5135     from .update import _get_variant_and_executable_path
5136
5137     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5138
5139
5140 def load_plugins(name, suffix, namespace):
5141     classes = {}
5142     with contextlib.suppress(FileNotFoundError):
5143         plugins_spec = importlib.util.spec_from_file_location(
5144             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5145         plugins = importlib.util.module_from_spec(plugins_spec)
5146         sys.modules[plugins_spec.name] = plugins
5147         plugins_spec.loader.exec_module(plugins)
5148         for name in dir(plugins):
5149             if name in namespace:
5150                 continue
5151             if not name.endswith(suffix):
5152                 continue
5153             klass = getattr(plugins, name)
5154             classes[name] = namespace[name] = klass
5155     return classes
5156
5157
5158 def traverse_obj(
5159         obj, *path_list, default=None, expected_type=None, get_all=True,
5160         casesense=True, is_user_input=False, traverse_string=False):
5161     ''' Traverse nested list/dict/tuple
5162     @param path_list        A list of paths which are checked one by one.
5163                             Each path is a list of keys where each key is a:
5164                               - None:     Do nothing
5165                               - string:   A dictionary key
5166                               - int:      An index into a list
5167                               - tuple:    A list of keys all of which will be traversed
5168                               - Ellipsis: Fetch all values in the object
5169                               - Function: Takes the key and value as arguments
5170                                           and returns whether the key matches or not
5171     @param default          Default value to return
5172     @param expected_type    Only accept final value of this type (Can also be any callable)
5173     @param get_all          Return all the values obtained from a path or only the first one
5174     @param casesense        Whether to consider dictionary keys as case sensitive
5175     @param is_user_input    Whether the keys are generated from user input. If True,
5176                             strings are converted to int/slice if necessary
5177     @param traverse_string  Whether to traverse inside strings. If True, any
5178                             non-compatible object will also be converted into a string
5179     # TODO: Write tests
5180     '''
5181     if not casesense:
5182         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
5183         path_list = (map(_lower, variadic(path)) for path in path_list)
5184
5185     def _traverse_obj(obj, path, _current_depth=0):
5186         nonlocal depth
5187         path = tuple(variadic(path))
5188         for i, key in enumerate(path):
5189             if None in (key, obj):
5190                 return obj
5191             if isinstance(key, (list, tuple)):
5192                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
5193                 key = ...
5194             if key is ...:
5195                 obj = (obj.values() if isinstance(obj, dict)
5196                        else obj if isinstance(obj, (list, tuple, LazyList))
5197                        else str(obj) if traverse_string else [])
5198                 _current_depth += 1
5199                 depth = max(depth, _current_depth)
5200                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
5201             elif callable(key):
5202                 if isinstance(obj, (list, tuple, LazyList)):
5203                     obj = enumerate(obj)
5204                 elif isinstance(obj, dict):
5205                     obj = obj.items()
5206                 else:
5207                     if not traverse_string:
5208                         return None
5209                     obj = str(obj)
5210                 _current_depth += 1
5211                 depth = max(depth, _current_depth)
5212                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
5213             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
5214                 obj = (obj.get(key) if casesense or (key in obj)
5215                        else next((v for k, v in obj.items() if _lower(k) == key), None))
5216             else:
5217                 if is_user_input:
5218                     key = (int_or_none(key) if ':' not in key
5219                            else slice(*map(int_or_none, key.split(':'))))
5220                     if key == slice(None):
5221                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
5222                 if not isinstance(key, (int, slice)):
5223                     return None
5224                 if not isinstance(obj, (list, tuple, LazyList)):
5225                     if not traverse_string:
5226                         return None
5227                     obj = str(obj)
5228                 try:
5229                     obj = obj[key]
5230                 except IndexError:
5231                     return None
5232         return obj
5233
5234     if isinstance(expected_type, type):
5235         type_test = lambda val: val if isinstance(val, expected_type) else None
5236     else:
5237         type_test = expected_type or IDENTITY
5238
5239     for path in path_list:
5240         depth = 0
5241         val = _traverse_obj(obj, path)
5242         if val is not None:
5243             if depth:
5244                 for _ in range(depth - 1):
5245                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5246                 val = [v for v in map(type_test, val) if v is not None]
5247                 if val:
5248                     return val if get_all else val[0]
5249             else:
5250                 val = type_test(val)
5251                 if val is not None:
5252                     return val
5253     return default
5254
5255
5256 def traverse_dict(dictn, keys, casesense=True):
5257     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5258                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5259     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5260
5261
5262 def get_first(obj, keys, **kwargs):
5263     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5264
5265
5266 def variadic(x, allowed_types=(str, bytes, dict)):
5267     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5268
5269
5270 def time_seconds(**kwargs):
5271     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5272     return t.timestamp()
5273
5274
5275 # create a JSON Web Signature (jws) with HS256 algorithm
5276 # the resulting format is in JWS Compact Serialization
5277 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5278 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5279 def jwt_encode_hs256(payload_data, key, headers={}):
5280     header_data = {
5281         'alg': 'HS256',
5282         'typ': 'JWT',
5283     }
5284     if headers:
5285         header_data.update(headers)
5286     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5287     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5288     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5289     signature_b64 = base64.b64encode(h.digest())
5290     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5291     return token
5292
5293
5294 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5295 def jwt_decode_hs256(jwt):
5296     header_b64, payload_b64, signature_b64 = jwt.split('.')
5297     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5298     return payload_data
5299
5300
5301 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5302
5303
5304 @functools.cache
5305 def supports_terminal_sequences(stream):
5306     if compat_os_name == 'nt':
5307         if not WINDOWS_VT_MODE:
5308             return False
5309     elif not os.getenv('TERM'):
5310         return False
5311     try:
5312         return stream.isatty()
5313     except BaseException:
5314         return False
5315
5316
5317 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5318     if get_windows_version() < (10, 0, 10586):
5319         return
5320     global WINDOWS_VT_MODE
5321     try:
5322         Popen.run('', shell=True)
5323     except Exception:
5324         return
5325
5326     WINDOWS_VT_MODE = True
5327     supports_terminal_sequences.cache_clear()
5328
5329
5330 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5331
5332
5333 def remove_terminal_sequences(string):
5334     return _terminal_sequences_re.sub('', string)
5335
5336
5337 def number_of_digits(number):
5338     return len('%d' % number)
5339
5340
5341 def join_nonempty(*values, delim='-', from_dict=None):
5342     if from_dict is not None:
5343         values = (traverse_obj(from_dict, variadic(v)) for v in values)
5344     return delim.join(map(str, filter(None, values)))
5345
5346
5347 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5348     """
5349     Find the largest format dimensions in terms of video width and, for each thumbnail:
5350     * Modify the URL: Match the width with the provided regex and replace with the former width
5351     * Update dimensions
5352
5353     This function is useful with video services that scale the provided thumbnails on demand
5354     """
5355     _keys = ('width', 'height')
5356     max_dimensions = max(
5357         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5358         default=(0, 0))
5359     if not max_dimensions[0]:
5360         return thumbnails
5361     return [
5362         merge_dicts(
5363             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5364             dict(zip(_keys, max_dimensions)), thumbnail)
5365         for thumbnail in thumbnails
5366     ]
5367
5368
5369 def parse_http_range(range):
5370     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5371     if not range:
5372         return None, None, None
5373     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5374     if not crg:
5375         return None, None, None
5376     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5377
5378
5379 def read_stdin(what):
5380     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5381     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5382     return sys.stdin
5383
5384
5385 class Config:
5386     own_args = None
5387     parsed_args = None
5388     filename = None
5389     __initialized = False
5390
5391     def __init__(self, parser, label=None):
5392         self.parser, self.label = parser, label
5393         self._loaded_paths, self.configs = set(), []
5394
5395     def init(self, args=None, filename=None):
5396         assert not self.__initialized
5397         self.own_args, self.filename = args, filename
5398         return self.load_configs()
5399
5400     def load_configs(self):
5401         directory = ''
5402         if self.filename:
5403             location = os.path.realpath(self.filename)
5404             directory = os.path.dirname(location)
5405             if location in self._loaded_paths:
5406                 return False
5407             self._loaded_paths.add(location)
5408
5409         self.__initialized = True
5410         opts, _ = self.parser.parse_known_args(self.own_args)
5411         self.parsed_args = self.own_args
5412         for location in opts.config_locations or []:
5413             if location == '-':
5414                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5415                 continue
5416             location = os.path.join(directory, expand_path(location))
5417             if os.path.isdir(location):
5418                 location = os.path.join(location, 'yt-dlp.conf')
5419             if not os.path.exists(location):
5420                 self.parser.error(f'config location {location} does not exist')
5421             self.append_config(self.read_file(location), location)
5422         return True
5423
5424     def __str__(self):
5425         label = join_nonempty(
5426             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5427             delim=' ')
5428         return join_nonempty(
5429             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5430             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5431             delim='\n')
5432
5433     @staticmethod
5434     def read_file(filename, default=[]):
5435         try:
5436             optionf = open(filename)
5437         except OSError:
5438             return default  # silently skip if file is not present
5439         try:
5440             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5441             contents = optionf.read()
5442             res = shlex.split(contents, comments=True)
5443         except Exception as err:
5444             raise ValueError(f'Unable to parse "{filename}": {err}')
5445         finally:
5446             optionf.close()
5447         return res
5448
5449     @staticmethod
5450     def hide_login_info(opts):
5451         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5452         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5453
5454         def _scrub_eq(o):
5455             m = eqre.match(o)
5456             if m:
5457                 return m.group('key') + '=PRIVATE'
5458             else:
5459                 return o
5460
5461         opts = list(map(_scrub_eq, opts))
5462         for idx, opt in enumerate(opts):
5463             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5464                 opts[idx + 1] = 'PRIVATE'
5465         return opts
5466
5467     def append_config(self, *args, label=None):
5468         config = type(self)(self.parser, label)
5469         config._loaded_paths = self._loaded_paths
5470         if config.init(*args):
5471             self.configs.append(config)
5472
5473     @property
5474     def all_args(self):
5475         for config in reversed(self.configs):
5476             yield from config.all_args
5477         yield from self.parsed_args or []
5478
5479     def parse_known_args(self, **kwargs):
5480         return self.parser.parse_known_args(self.all_args, **kwargs)
5481
5482     def parse_args(self):
5483         return self.parser.parse_args(self.all_args)
5484
5485
5486 class WebSocketsWrapper():
5487     """Wraps websockets module to use in non-async scopes"""
5488     pool = None
5489
5490     def __init__(self, url, headers=None, connect=True):
5491         self.loop = asyncio.new_event_loop()
5492         # XXX: "loop" is deprecated
5493         self.conn = websockets.connect(
5494             url, extra_headers=headers, ping_interval=None,
5495             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5496         if connect:
5497             self.__enter__()
5498         atexit.register(self.__exit__, None, None, None)
5499
5500     def __enter__(self):
5501         if not self.pool:
5502             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5503         return self
5504
5505     def send(self, *args):
5506         self.run_with_loop(self.pool.send(*args), self.loop)
5507
5508     def recv(self, *args):
5509         return self.run_with_loop(self.pool.recv(*args), self.loop)
5510
5511     def __exit__(self, type, value, traceback):
5512         try:
5513             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5514         finally:
5515             self.loop.close()
5516             self._cancel_all_tasks(self.loop)
5517
5518     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5519     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5520     @staticmethod
5521     def run_with_loop(main, loop):
5522         if not asyncio.iscoroutine(main):
5523             raise ValueError(f'a coroutine was expected, got {main!r}')
5524
5525         try:
5526             return loop.run_until_complete(main)
5527         finally:
5528             loop.run_until_complete(loop.shutdown_asyncgens())
5529             if hasattr(loop, 'shutdown_default_executor'):
5530                 loop.run_until_complete(loop.shutdown_default_executor())
5531
5532     @staticmethod
5533     def _cancel_all_tasks(loop):
5534         to_cancel = asyncio.all_tasks(loop)
5535
5536         if not to_cancel:
5537             return
5538
5539         for task in to_cancel:
5540             task.cancel()
5541
5542         # XXX: "loop" is removed in python 3.10+
5543         loop.run_until_complete(
5544             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5545
5546         for task in to_cancel:
5547             if task.cancelled():
5548                 continue
5549             if task.exception() is not None:
5550                 loop.call_exception_handler({
5551                     'message': 'unhandled exception during asyncio.run() shutdown',
5552                     'exception': task.exception(),
5553                     'task': task,
5554                 })
5555
5556
5557 def merge_headers(*dicts):
5558     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5559     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5560
5561
5562 def cached_method(f):
5563     """Cache a method"""
5564     signature = inspect.signature(f)
5565
5566     @functools.wraps(f)
5567     def wrapper(self, *args, **kwargs):
5568         bound_args = signature.bind(self, *args, **kwargs)
5569         bound_args.apply_defaults()
5570         key = tuple(bound_args.arguments.values())
5571
5572         if not hasattr(self, '__cached_method__cache'):
5573             self.__cached_method__cache = {}
5574         cache = self.__cached_method__cache.setdefault(f.__name__, {})
5575         if key not in cache:
5576             cache[key] = f(self, *args, **kwargs)
5577         return cache[key]
5578     return wrapper
5579
5580
5581 class classproperty:
5582     """property access for class methods"""
5583
5584     def __init__(self, func):
5585         functools.update_wrapper(self, func)
5586         self.func = func
5587
5588     def __get__(self, _, cls):
5589         return self.func(cls)
5590
5591
5592 class Namespace(types.SimpleNamespace):
5593     """Immutable namespace"""
5594
5595     def __iter__(self):
5596         return iter(self.__dict__.values())
5597
5598     @property
5599     def items_(self):
5600         return self.__dict__.items()
5601
5602
5603 # Deprecated
5604 has_certifi = bool(certifi)
5605 has_websockets = bool(websockets)