yt_dlp/utils.py

   1 import asyncio
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import contextlib
   9 import datetime
  10 import email.header
  11 import email.utils
  12 import errno
  13 import gzip
  14 import hashlib
  15 import hmac
  16 import html.entities
  17 import html.parser
  18 import http.client
  19 import http.cookiejar
  20 import importlib.util
  21 import inspect
  22 import io
  23 import itertools
  24 import json
  25 import locale
  26 import math
  27 import mimetypes
  28 import operator
  29 import os
  30 import platform
  31 import random
  32 import re
  33 import shlex
  34 import socket
  35 import ssl
  36 import struct
  37 import subprocess
  38 import sys
  39 import tempfile
  40 import time
  41 import traceback
  42 import types
  43 import unicodedata
  44 import urllib.error
  45 import urllib.parse
  46 import urllib.request
  47 import xml.etree.ElementTree
  48 import zlib
  49
  50 from .compat import functools  # isort: split
  51 from .compat import (
  52     compat_etree_fromstring,
  53     compat_expanduser,
  54     compat_HTMLParseError,
  55     compat_os_name,
  56     compat_shlex_quote,
  57 )
  58 from .dependencies import brotli, certifi, websockets, xattr
  59 from .socks import ProxyType, sockssocket
  60
  61
  62 def register_socks_protocols():
  63     # "Register" SOCKS protocols
  64     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  65     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  66     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  67         if scheme not in urllib.parse.uses_netloc:
  68             urllib.parse.uses_netloc.append(scheme)
  69
  70
  71 # This is not clearly defined otherwise
  72 compiled_regex_type = type(re.compile(''))
  73
  74
  75 def random_user_agent():
  76     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  77     _CHROME_VERSIONS = (
  78         '90.0.4430.212',
  79         '90.0.4430.24',
  80         '90.0.4430.70',
  81         '90.0.4430.72',
  82         '90.0.4430.85',
  83         '90.0.4430.93',
  84         '91.0.4472.101',
  85         '91.0.4472.106',
  86         '91.0.4472.114',
  87         '91.0.4472.124',
  88         '91.0.4472.164',
  89         '91.0.4472.19',
  90         '91.0.4472.77',
  91         '92.0.4515.107',
  92         '92.0.4515.115',
  93         '92.0.4515.131',
  94         '92.0.4515.159',
  95         '92.0.4515.43',
  96         '93.0.4556.0',
  97         '93.0.4577.15',
  98         '93.0.4577.63',
  99         '93.0.4577.82',
 100         '94.0.4606.41',
 101         '94.0.4606.54',
 102         '94.0.4606.61',
 103         '94.0.4606.71',
 104         '94.0.4606.81',
 105         '94.0.4606.85',
 106         '95.0.4638.17',
 107         '95.0.4638.50',
 108         '95.0.4638.54',
 109         '95.0.4638.69',
 110         '95.0.4638.74',
 111         '96.0.4664.18',
 112         '96.0.4664.45',
 113         '96.0.4664.55',
 114         '96.0.4664.93',
 115         '97.0.4692.20',
 116     )
 117     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 118
 119
 120 SUPPORTED_ENCODINGS = [
 121     'gzip', 'deflate'
 122 ]
 123 if brotli:
 124     SUPPORTED_ENCODINGS.append('br')
 125
 126 std_headers = {
 127     'User-Agent': random_user_agent(),
 128     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 129     'Accept-Language': 'en-us,en;q=0.5',
 130     'Sec-Fetch-Mode': 'navigate',
 131 }
 132
 133
 134 USER_AGENTS = {
 135     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 136 }
 137
 138
 139 NO_DEFAULT = object()
 140 IDENTITY = lambda x: x
 141
 142 ENGLISH_MONTH_NAMES = [
 143     'January', 'February', 'March', 'April', 'May', 'June',
 144     'July', 'August', 'September', 'October', 'November', 'December']
 145
 146 MONTH_NAMES = {
 147     'en': ENGLISH_MONTH_NAMES,
 148     'fr': [
 149         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 150         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 151 }
 152
 153 # needed for sanitizing filenames in restricted mode
 154 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 155                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 156                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 157
 158 DATE_FORMATS = (
 159     '%d %B %Y',
 160     '%d %b %Y',
 161     '%B %d %Y',
 162     '%B %dst %Y',
 163     '%B %dnd %Y',
 164     '%B %drd %Y',
 165     '%B %dth %Y',
 166     '%b %d %Y',
 167     '%b %dst %Y',
 168     '%b %dnd %Y',
 169     '%b %drd %Y',
 170     '%b %dth %Y',
 171     '%b %dst %Y %I:%M',
 172     '%b %dnd %Y %I:%M',
 173     '%b %drd %Y %I:%M',
 174     '%b %dth %Y %I:%M',
 175     '%Y %m %d',
 176     '%Y-%m-%d',
 177     '%Y.%m.%d.',
 178     '%Y/%m/%d',
 179     '%Y/%m/%d %H:%M',
 180     '%Y/%m/%d %H:%M:%S',
 181     '%Y%m%d%H%M',
 182     '%Y%m%d%H%M%S',
 183     '%Y%m%d',
 184     '%Y-%m-%d %H:%M',
 185     '%Y-%m-%d %H:%M:%S',
 186     '%Y-%m-%d %H:%M:%S.%f',
 187     '%Y-%m-%d %H:%M:%S:%f',
 188     '%d.%m.%Y %H:%M',
 189     '%d.%m.%Y %H.%M',
 190     '%Y-%m-%dT%H:%M:%SZ',
 191     '%Y-%m-%dT%H:%M:%S.%fZ',
 192     '%Y-%m-%dT%H:%M:%S.%f0Z',
 193     '%Y-%m-%dT%H:%M:%S',
 194     '%Y-%m-%dT%H:%M:%S.%f',
 195     '%Y-%m-%dT%H:%M',
 196     '%b %d %Y at %H:%M',
 197     '%b %d %Y at %H:%M:%S',
 198     '%B %d %Y at %H:%M',
 199     '%B %d %Y at %H:%M:%S',
 200     '%H:%M %d-%b-%Y',
 201 )
 202
 203 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 204 DATE_FORMATS_DAY_FIRST.extend([
 205     '%d-%m-%Y',
 206     '%d.%m.%Y',
 207     '%d.%m.%y',
 208     '%d/%m/%Y',
 209     '%d/%m/%y',
 210     '%d/%m/%Y %H:%M:%S',
 211     '%d-%m-%Y %H:%M',
 212 ])
 213
 214 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 215 DATE_FORMATS_MONTH_FIRST.extend([
 216     '%m-%d-%Y',
 217     '%m.%d.%Y',
 218     '%m/%d/%Y',
 219     '%m/%d/%y',
 220     '%m/%d/%Y %H:%M:%S',
 221 ])
 222
 223 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 224 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?})\s*</script>'
 225
 226 NUMBER_RE = r'\d+(?:\.\d+)?'
 227
 228
 229 @functools.cache
 230 def preferredencoding():
 231     """Get preferred encoding.
 232
 233     Returns the best encoding scheme for the system, based on
 234     locale.getpreferredencoding() and some further tweaks.
 235     """
 236     try:
 237         pref = locale.getpreferredencoding()
 238         'TEST'.encode(pref)
 239     except Exception:
 240         pref = 'UTF-8'
 241
 242     return pref
 243
 244
 245 def write_json_file(obj, fn):
 246     """ Encode obj as JSON and write it to fn, atomically if possible """
 247
 248     tf = tempfile.NamedTemporaryFile(
 249         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 250         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 251
 252     try:
 253         with tf:
 254             json.dump(obj, tf, ensure_ascii=False)
 255         if sys.platform == 'win32':
 256             # Need to remove existing file on Windows, else os.rename raises
 257             # WindowsError or FileExistsError.
 258             with contextlib.suppress(OSError):
 259                 os.unlink(fn)
 260         with contextlib.suppress(OSError):
 261             mask = os.umask(0)
 262             os.umask(mask)
 263             os.chmod(tf.name, 0o666 & ~mask)
 264         os.rename(tf.name, fn)
 265     except Exception:
 266         with contextlib.suppress(OSError):
 267             os.remove(tf.name)
 268         raise
 269
 270
 271 def find_xpath_attr(node, xpath, key, val=None):
 272     """ Find the xpath xpath[@key=val] """
 273     assert re.match(r'^[a-zA-Z_-]+$', key)
 274     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 275     return node.find(expr)
 276
 277 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 278 # the namespace parameter
 279
 280
 281 def xpath_with_ns(path, ns_map):
 282     components = [c.split(':') for c in path.split('/')]
 283     replaced = []
 284     for c in components:
 285         if len(c) == 1:
 286             replaced.append(c[0])
 287         else:
 288             ns, tag = c
 289             replaced.append('{%s}%s' % (ns_map[ns], tag))
 290     return '/'.join(replaced)
 291
 292
 293 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 294     def _find_xpath(xpath):
 295         return node.find(xpath)
 296
 297     if isinstance(xpath, str):
 298         n = _find_xpath(xpath)
 299     else:
 300         for xp in xpath:
 301             n = _find_xpath(xp)
 302             if n is not None:
 303                 break
 304
 305     if n is None:
 306         if default is not NO_DEFAULT:
 307             return default
 308         elif fatal:
 309             name = xpath if name is None else name
 310             raise ExtractorError('Could not find XML element %s' % name)
 311         else:
 312             return None
 313     return n
 314
 315
 316 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 317     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 318     if n is None or n == default:
 319         return n
 320     if n.text is None:
 321         if default is not NO_DEFAULT:
 322             return default
 323         elif fatal:
 324             name = xpath if name is None else name
 325             raise ExtractorError('Could not find XML element\'s text %s' % name)
 326         else:
 327             return None
 328     return n.text
 329
 330
 331 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 332     n = find_xpath_attr(node, xpath, key)
 333     if n is None:
 334         if default is not NO_DEFAULT:
 335             return default
 336         elif fatal:
 337             name = f'{xpath}[@{key}]' if name is None else name
 338             raise ExtractorError('Could not find XML attribute %s' % name)
 339         else:
 340             return None
 341     return n.attrib[key]
 342
 343
 344 def get_element_by_id(id, html, **kwargs):
 345     """Return the content of the tag with the specified ID in the passed HTML document"""
 346     return get_element_by_attribute('id', id, html, **kwargs)
 347
 348
 349 def get_element_html_by_id(id, html, **kwargs):
 350     """Return the html of the tag with the specified ID in the passed HTML document"""
 351     return get_element_html_by_attribute('id', id, html, **kwargs)
 352
 353
 354 def get_element_by_class(class_name, html):
 355     """Return the content of the first tag with the specified class in the passed HTML document"""
 356     retval = get_elements_by_class(class_name, html)
 357     return retval[0] if retval else None
 358
 359
 360 def get_element_html_by_class(class_name, html):
 361     """Return the html of the first tag with the specified class in the passed HTML document"""
 362     retval = get_elements_html_by_class(class_name, html)
 363     return retval[0] if retval else None
 364
 365
 366 def get_element_by_attribute(attribute, value, html, **kwargs):
 367     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 368     return retval[0] if retval else None
 369
 370
 371 def get_element_html_by_attribute(attribute, value, html, **kargs):
 372     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 373     return retval[0] if retval else None
 374
 375
 376 def get_elements_by_class(class_name, html, **kargs):
 377     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 378     return get_elements_by_attribute(
 379         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 380         html, escape_value=False)
 381
 382
 383 def get_elements_html_by_class(class_name, html):
 384     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 385     return get_elements_html_by_attribute(
 386         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 387         html, escape_value=False)
 388
 389
 390 def get_elements_by_attribute(*args, **kwargs):
 391     """Return the content of the tag with the specified attribute in the passed HTML document"""
 392     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 393
 394
 395 def get_elements_html_by_attribute(*args, **kwargs):
 396     """Return the html of the tag with the specified attribute in the passed HTML document"""
 397     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 398
 399
 400 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 401     """
 402     Return the text (content) and the html (whole) of the tag with the specified
 403     attribute in the passed HTML document
 404     """
 405
 406     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 407
 408     value = re.escape(value) if escape_value else value
 409
 410     partial_element_re = rf'''(?x)
 411         <(?P<tag>[a-zA-Z0-9:._-]+)
 412          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 413          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 414         '''
 415
 416     for m in re.finditer(partial_element_re, html):
 417         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 418
 419         yield (
 420             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 421             whole
 422         )
 423
 424
 425 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
 426     """
 427     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 428     closing tag for the first opening tag it has encountered, and can be used
 429     as a context manager
 430     """
 431
 432     class HTMLBreakOnClosingTagException(Exception):
 433         pass
 434
 435     def __init__(self):
 436         self.tagstack = collections.deque()
 437         html.parser.HTMLParser.__init__(self)
 438
 439     def __enter__(self):
 440         return self
 441
 442     def __exit__(self, *_):
 443         self.close()
 444
 445     def close(self):
 446         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 447         # so data remains buffered; we no longer have any interest in it, thus
 448         # override this method to discard it
 449         pass
 450
 451     def handle_starttag(self, tag, _):
 452         self.tagstack.append(tag)
 453
 454     def handle_endtag(self, tag):
 455         if not self.tagstack:
 456             raise compat_HTMLParseError('no tags in the stack')
 457         while self.tagstack:
 458             inner_tag = self.tagstack.pop()
 459             if inner_tag == tag:
 460                 break
 461         else:
 462             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 463         if not self.tagstack:
 464             raise self.HTMLBreakOnClosingTagException()
 465
 466
 467 def get_element_text_and_html_by_tag(tag, html):
 468     """
 469     For the first element with the specified tag in the passed HTML document
 470     return its' content (text) and the whole element (html)
 471     """
 472     def find_or_raise(haystack, needle, exc):
 473         try:
 474             return haystack.index(needle)
 475         except ValueError:
 476             raise exc
 477     closing_tag = f'</{tag}>'
 478     whole_start = find_or_raise(
 479         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 480     content_start = find_or_raise(
 481         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 482     content_start += whole_start + 1
 483     with HTMLBreakOnClosingTagParser() as parser:
 484         parser.feed(html[whole_start:content_start])
 485         if not parser.tagstack or parser.tagstack[0] != tag:
 486             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 487         offset = content_start
 488         while offset < len(html):
 489             next_closing_tag_start = find_or_raise(
 490                 html[offset:], closing_tag,
 491                 compat_HTMLParseError(f'closing {tag} tag not found'))
 492             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 493             try:
 494                 parser.feed(html[offset:offset + next_closing_tag_end])
 495                 offset += next_closing_tag_end
 496             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 497                 return html[content_start:offset + next_closing_tag_start], \
 498                     html[whole_start:offset + next_closing_tag_end]
 499         raise compat_HTMLParseError('unexpected end of html')
 500
 501
 502 class HTMLAttributeParser(html.parser.HTMLParser):
 503     """Trivial HTML parser to gather the attributes for a single element"""
 504
 505     def __init__(self):
 506         self.attrs = {}
 507         html.parser.HTMLParser.__init__(self)
 508
 509     def handle_starttag(self, tag, attrs):
 510         self.attrs = dict(attrs)
 511
 512
 513 class HTMLListAttrsParser(html.parser.HTMLParser):
 514     """HTML parser to gather the attributes for the elements of a list"""
 515
 516     def __init__(self):
 517         html.parser.HTMLParser.__init__(self)
 518         self.items = []
 519         self._level = 0
 520
 521     def handle_starttag(self, tag, attrs):
 522         if tag == 'li' and self._level == 0:
 523             self.items.append(dict(attrs))
 524         self._level += 1
 525
 526     def handle_endtag(self, tag):
 527         self._level -= 1
 528
 529
 530 def extract_attributes(html_element):
 531     """Given a string for an HTML element such as
 532     <el
 533          a="foo" B="bar" c="&98;az" d=boz
 534          empty= noval entity="&amp;"
 535          sq='"' dq="'"
 536     >
 537     Decode and return a dictionary of attributes.
 538     {
 539         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 540         'empty': '', 'noval': None, 'entity': '&',
 541         'sq': '"', 'dq': '\''
 542     }.
 543     """
 544     parser = HTMLAttributeParser()
 545     with contextlib.suppress(compat_HTMLParseError):
 546         parser.feed(html_element)
 547         parser.close()
 548     return parser.attrs
 549
 550
 551 def parse_list(webpage):
 552     """Given a string for an series of HTML <li> elements,
 553     return a dictionary of their attributes"""
 554     parser = HTMLListAttrsParser()
 555     parser.feed(webpage)
 556     parser.close()
 557     return parser.items
 558
 559
 560 def clean_html(html):
 561     """Clean an HTML snippet into a readable string"""
 562
 563     if html is None:  # Convenience for sanitizing descriptions etc.
 564         return html
 565
 566     html = re.sub(r'\s+', ' ', html)
 567     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 568     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 569     # Strip html tags
 570     html = re.sub('<.*?>', '', html)
 571     # Replace html entities
 572     html = unescapeHTML(html)
 573     return html.strip()
 574
 575
 576 class LenientJSONDecoder(json.JSONDecoder):
 577     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 578         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 579         super().__init__(*args, **kwargs)
 580
 581     def decode(self, s):
 582         if self.transform_source:
 583             s = self.transform_source(s)
 584         if self.ignore_extra:
 585             return self.raw_decode(s.lstrip())[0]
 586         return super().decode(s)
 587
 588
 589 def sanitize_open(filename, open_mode):
 590     """Try to open the given filename, and slightly tweak it if this fails.
 591
 592     Attempts to open the given filename. If this fails, it tries to change
 593     the filename slightly, step by step, until it's either able to open it
 594     or it fails and raises a final exception, like the standard open()
 595     function.
 596
 597     It returns the tuple (stream, definitive_file_name).
 598     """
 599     if filename == '-':
 600         if sys.platform == 'win32':
 601             import msvcrt
 602
 603             # stdout may be any IO stream. Eg, when using contextlib.redirect_stdout
 604             with contextlib.suppress(io.UnsupportedOperation):
 605                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 606         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 607
 608     for attempt in range(2):
 609         try:
 610             try:
 611                 if sys.platform == 'win32':
 612                     # FIXME: An exclusive lock also locks the file from being read.
 613                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 614                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 615                     raise LockingUnsupportedError()
 616                 stream = locked_file(filename, open_mode, block=False).__enter__()
 617             except OSError:
 618                 stream = open(filename, open_mode)
 619             return stream, filename
 620         except OSError as err:
 621             if attempt or err.errno in (errno.EACCES,):
 622                 raise
 623             old_filename, filename = filename, sanitize_path(filename)
 624             if old_filename == filename:
 625                 raise
 626
 627
 628 def timeconvert(timestr):
 629     """Convert RFC 2822 defined time string into system timestamp"""
 630     timestamp = None
 631     timetuple = email.utils.parsedate_tz(timestr)
 632     if timetuple is not None:
 633         timestamp = email.utils.mktime_tz(timetuple)
 634     return timestamp
 635
 636
 637 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 638     """Sanitizes a string so it could be used as part of a filename.
 639     @param restricted   Use a stricter subset of allowed characters
 640     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 641                         If unset, yt-dlp's new sanitization rules are in effect
 642     """
 643     if s == '':
 644         return ''
 645
 646     def replace_insane(char):
 647         if restricted and char in ACCENT_CHARS:
 648             return ACCENT_CHARS[char]
 649         elif not restricted and char == '\n':
 650             return '\0 '
 651         elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
 652             # Replace with their full-width unicode counterparts
 653             return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
 654         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 655             return ''
 656         elif char == '"':
 657             return '' if restricted else '\''
 658         elif char == ':':
 659             return '\0_\0-' if restricted else '\0 \0-'
 660         elif char in '\\/|*<>':
 661             return '\0_'
 662         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 663             return '\0_'
 664         return char
 665
 666     if restricted and is_id is NO_DEFAULT:
 667         s = unicodedata.normalize('NFKC', s)
 668     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 669     result = ''.join(map(replace_insane, s))
 670     if is_id is NO_DEFAULT:
 671         result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
 672         STRIP_RE = r'(?:\0.|[ _-])*'
 673         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 674     result = result.replace('\0', '') or '_'
 675
 676     if not is_id:
 677         while '__' in result:
 678             result = result.replace('__', '_')
 679         result = result.strip('_')
 680         # Common case of "Foreign band name - English song title"
 681         if restricted and result.startswith('-_'):
 682             result = result[2:]
 683         if result.startswith('-'):
 684             result = '_' + result[len('-'):]
 685         result = result.lstrip('.')
 686         if not result:
 687             result = '_'
 688     return result
 689
 690
 691 def sanitize_path(s, force=False):
 692     """Sanitizes and normalizes path on Windows"""
 693     if sys.platform == 'win32':
 694         force = False
 695         drive_or_unc, _ = os.path.splitdrive(s)
 696     elif force:
 697         drive_or_unc = ''
 698     else:
 699         return s
 700
 701     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 702     if drive_or_unc:
 703         norm_path.pop(0)
 704     sanitized_path = [
 705         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 706         for path_part in norm_path]
 707     if drive_or_unc:
 708         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 709     elif force and s and s[0] == os.path.sep:
 710         sanitized_path.insert(0, os.path.sep)
 711     return os.path.join(*sanitized_path)
 712
 713
 714 def sanitize_url(url, *, scheme='http'):
 715     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 716     # the number of unwanted failures due to missing protocol
 717     if url is None:
 718         return
 719     elif url.startswith('//'):
 720         return f'{scheme}:{url}'
 721     # Fix some common typos seen so far
 722     COMMON_TYPOS = (
 723         # https://github.com/ytdl-org/youtube-dl/issues/15649
 724         (r'^httpss://', r'https://'),
 725         # https://bx1.be/lives/direct-tv/
 726         (r'^rmtp([es]?)://', r'rtmp\1://'),
 727     )
 728     for mistake, fixup in COMMON_TYPOS:
 729         if re.match(mistake, url):
 730             return re.sub(mistake, fixup, url)
 731     return url
 732
 733
 734 def extract_basic_auth(url):
 735     parts = urllib.parse.urlsplit(url)
 736     if parts.username is None:
 737         return url, None
 738     url = urllib.parse.urlunsplit(parts._replace(netloc=(
 739         parts.hostname if parts.port is None
 740         else '%s:%d' % (parts.hostname, parts.port))))
 741     auth_payload = base64.b64encode(
 742         ('%s:%s' % (parts.username, parts.password or '')).encode())
 743     return url, f'Basic {auth_payload.decode()}'
 744
 745
 746 def sanitized_Request(url, *args, **kwargs):
 747     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 748     if auth_header is not None:
 749         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 750         headers['Authorization'] = auth_header
 751     return urllib.request.Request(url, *args, **kwargs)
 752
 753
 754 def expand_path(s):
 755     """Expand shell variables and ~"""
 756     return os.path.expandvars(compat_expanduser(s))
 757
 758
 759 def orderedSet(iterable, *, lazy=False):
 760     """Remove all duplicates from the input iterable"""
 761     def _iter():
 762         seen = []  # Do not use set since the items can be unhashable
 763         for x in iterable:
 764             if x not in seen:
 765                 seen.append(x)
 766                 yield x
 767
 768     return _iter() if lazy else list(_iter())
 769
 770
 771 def _htmlentity_transform(entity_with_semicolon):
 772     """Transforms an HTML entity to a character."""
 773     entity = entity_with_semicolon[:-1]
 774
 775     # Known non-numeric HTML entity
 776     if entity in html.entities.name2codepoint:
 777         return chr(html.entities.name2codepoint[entity])
 778
 779     # TODO: HTML5 allows entities without a semicolon. For example,
 780     # '&Eacuteric' should be decoded as 'Éric'.
 781     if entity_with_semicolon in html.entities.html5:
 782         return html.entities.html5[entity_with_semicolon]
 783
 784     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 785     if mobj is not None:
 786         numstr = mobj.group(1)
 787         if numstr.startswith('x'):
 788             base = 16
 789             numstr = '0%s' % numstr
 790         else:
 791             base = 10
 792         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 793         with contextlib.suppress(ValueError):
 794             return chr(int(numstr, base))
 795
 796     # Unknown entity in name, return its literal representation
 797     return '&%s;' % entity
 798
 799
 800 def unescapeHTML(s):
 801     if s is None:
 802         return None
 803     assert isinstance(s, str)
 804
 805     return re.sub(
 806         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 807
 808
 809 def escapeHTML(text):
 810     return (
 811         text
 812         .replace('&', '&amp;')
 813         .replace('<', '&lt;')
 814         .replace('>', '&gt;')
 815         .replace('"', '&quot;')
 816         .replace("'", '&#39;')
 817     )
 818
 819
 820 def process_communicate_or_kill(p, *args, **kwargs):
 821     write_string('DeprecationWarning: yt_dlp.utils.process_communicate_or_kill is deprecated '
 822                  'and may be removed in a future version. Use yt_dlp.utils.Popen.communicate_or_kill instead')
 823     return Popen.communicate_or_kill(p, *args, **kwargs)
 824
 825
 826 class Popen(subprocess.Popen):
 827     if sys.platform == 'win32':
 828         _startupinfo = subprocess.STARTUPINFO()
 829         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 830     else:
 831         _startupinfo = None
 832
 833     def __init__(self, *args, text=False, **kwargs):
 834         if text is True:
 835             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 836             kwargs.setdefault('encoding', 'utf-8')
 837             kwargs.setdefault('errors', 'replace')
 838         super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
 839
 840     def communicate_or_kill(self, *args, **kwargs):
 841         try:
 842             return self.communicate(*args, **kwargs)
 843         except BaseException:  # Including KeyboardInterrupt
 844             self.kill(timeout=None)
 845             raise
 846
 847     def kill(self, *, timeout=0):
 848         super().kill()
 849         if timeout != 0:
 850             self.wait(timeout=timeout)
 851
 852     @classmethod
 853     def run(cls, *args, **kwargs):
 854         with cls(*args, **kwargs) as proc:
 855             stdout, stderr = proc.communicate_or_kill()
 856             return stdout or '', stderr or '', proc.returncode
 857
 858
 859 def get_subprocess_encoding():
 860     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 861         # For subprocess calls, encode with locale encoding
 862         # Refer to http://stackoverflow.com/a/9951851/35070
 863         encoding = preferredencoding()
 864     else:
 865         encoding = sys.getfilesystemencoding()
 866     if encoding is None:
 867         encoding = 'utf-8'
 868     return encoding
 869
 870
 871 def encodeFilename(s, for_subprocess=False):
 872     assert isinstance(s, str)
 873     return s
 874
 875
 876 def decodeFilename(b, for_subprocess=False):
 877     return b
 878
 879
 880 def encodeArgument(s):
 881     # Legacy code that uses byte strings
 882     # Uncomment the following line after fixing all post processors
 883     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
 884     return s if isinstance(s, str) else s.decode('ascii')
 885
 886
 887 def decodeArgument(b):
 888     return b
 889
 890
 891 def decodeOption(optval):
 892     if optval is None:
 893         return optval
 894     if isinstance(optval, bytes):
 895         optval = optval.decode(preferredencoding())
 896
 897     assert isinstance(optval, str)
 898     return optval
 899
 900
 901 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 902
 903
 904 def timetuple_from_msec(msec):
 905     secs, msec = divmod(msec, 1000)
 906     mins, secs = divmod(secs, 60)
 907     hrs, mins = divmod(mins, 60)
 908     return _timetuple(hrs, mins, secs, msec)
 909
 910
 911 def formatSeconds(secs, delim=':', msec=False):
 912     time = timetuple_from_msec(secs * 1000)
 913     if time.hours:
 914         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 915     elif time.minutes:
 916         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 917     else:
 918         ret = '%d' % time.seconds
 919     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 920
 921
 922 def _ssl_load_windows_store_certs(ssl_context, storename):
 923     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 924     try:
 925         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 926                  if encoding == 'x509_asn' and (
 927                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 928     except PermissionError:
 929         return
 930     for cert in certs:
 931         with contextlib.suppress(ssl.SSLError):
 932             ssl_context.load_verify_locations(cadata=cert)
 933
 934
 935 def make_HTTPS_handler(params, **kwargs):
 936     opts_check_certificate = not params.get('nocheckcertificate')
 937     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 938     context.check_hostname = opts_check_certificate
 939     if params.get('legacyserverconnect'):
 940         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 941         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 942         context.set_ciphers('DEFAULT')
 943
 944     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 945     if opts_check_certificate:
 946         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 947             context.load_verify_locations(cafile=certifi.where())
 948         else:
 949             try:
 950                 context.load_default_certs()
 951                 # Work around the issue in load_default_certs when there are bad certificates. See:
 952                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
 953                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 954             except ssl.SSLError:
 955                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 956                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 957                     for storename in ('CA', 'ROOT'):
 958                         _ssl_load_windows_store_certs(context, storename)
 959                 context.set_default_verify_paths()
 960
 961     client_certfile = params.get('client_certificate')
 962     if client_certfile:
 963         try:
 964             context.load_cert_chain(
 965                 client_certfile, keyfile=params.get('client_certificate_key'),
 966                 password=params.get('client_certificate_password'))
 967         except ssl.SSLError:
 968             raise YoutubeDLError('Unable to load client certificate')
 969
 970     # Some servers may reject requests if ALPN extension is not sent. See:
 971     # https://github.com/python/cpython/issues/85140
 972     # https://github.com/yt-dlp/yt-dlp/issues/3878
 973     with contextlib.suppress(NotImplementedError):
 974         context.set_alpn_protocols(['http/1.1'])
 975
 976     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 977
 978
 979 def bug_reports_message(before=';'):
 980     from .update import REPOSITORY
 981
 982     msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
 983            'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')
 984
 985     before = before.rstrip()
 986     if not before or before.endswith(('.', '!', '?')):
 987         msg = msg[0].title() + msg[1:]
 988
 989     return (before + ' ' if before else '') + msg
 990
 991
 992 class YoutubeDLError(Exception):
 993     """Base exception for YoutubeDL errors."""
 994     msg = None
 995
 996     def __init__(self, msg=None):
 997         if msg is not None:
 998             self.msg = msg
 999         elif self.msg is None:
1000             self.msg = type(self).__name__
1001         super().__init__(self.msg)
1002
1003
1004 network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
1005 if hasattr(ssl, 'CertificateError'):
1006     network_exceptions.append(ssl.CertificateError)
1007 network_exceptions = tuple(network_exceptions)
1008
1009
1010 class ExtractorError(YoutubeDLError):
1011     """Error during info extraction."""
1012
1013     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1014         """ tb, if given, is the original traceback (so that it can be printed out).
1015         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1016         """
1017         if sys.exc_info()[0] in network_exceptions:
1018             expected = True
1019
1020         self.orig_msg = str(msg)
1021         self.traceback = tb
1022         self.expected = expected
1023         self.cause = cause
1024         self.video_id = video_id
1025         self.ie = ie
1026         self.exc_info = sys.exc_info()  # preserve original exception
1027         if isinstance(self.exc_info[1], ExtractorError):
1028             self.exc_info = self.exc_info[1].exc_info
1029
1030         super().__init__(''.join((
1031             format_field(ie, None, '[%s] '),
1032             format_field(video_id, None, '%s: '),
1033             msg,
1034             format_field(cause, None, ' (caused by %r)'),
1035             '' if expected else bug_reports_message())))
1036
1037     def format_traceback(self):
1038         return join_nonempty(
1039             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1040             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1041             delim='\n') or None
1042
1043
1044 class UnsupportedError(ExtractorError):
1045     def __init__(self, url):
1046         super().__init__(
1047             'Unsupported URL: %s' % url, expected=True)
1048         self.url = url
1049
1050
1051 class RegexNotFoundError(ExtractorError):
1052     """Error when a regex didn't match"""
1053     pass
1054
1055
1056 class GeoRestrictedError(ExtractorError):
1057     """Geographic restriction Error exception.
1058
1059     This exception may be thrown when a video is not available from your
1060     geographic location due to geographic restrictions imposed by a website.
1061     """
1062
1063     def __init__(self, msg, countries=None, **kwargs):
1064         kwargs['expected'] = True
1065         super().__init__(msg, **kwargs)
1066         self.countries = countries
1067
1068
1069 class UserNotLive(ExtractorError):
1070     """Error when a channel/user is not live"""
1071
1072     def __init__(self, msg=None, **kwargs):
1073         kwargs['expected'] = True
1074         super().__init__(msg or 'The channel is not currently live', **kwargs)
1075
1076
1077 class DownloadError(YoutubeDLError):
1078     """Download Error exception.
1079
1080     This exception may be thrown by FileDownloader objects if they are not
1081     configured to continue on errors. They will contain the appropriate
1082     error message.
1083     """
1084
1085     def __init__(self, msg, exc_info=None):
1086         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1087         super().__init__(msg)
1088         self.exc_info = exc_info
1089
1090
1091 class EntryNotInPlaylist(YoutubeDLError):
1092     """Entry not in playlist exception.
1093
1094     This exception will be thrown by YoutubeDL when a requested entry
1095     is not found in the playlist info_dict
1096     """
1097     msg = 'Entry not found in info'
1098
1099
1100 class SameFileError(YoutubeDLError):
1101     """Same File exception.
1102
1103     This exception will be thrown by FileDownloader objects if they detect
1104     multiple files would have to be downloaded to the same file on disk.
1105     """
1106     msg = 'Fixed output name but more than one file to download'
1107
1108     def __init__(self, filename=None):
1109         if filename is not None:
1110             self.msg += f': {filename}'
1111         super().__init__(self.msg)
1112
1113
1114 class PostProcessingError(YoutubeDLError):
1115     """Post Processing exception.
1116
1117     This exception may be raised by PostProcessor's .run() method to
1118     indicate an error in the postprocessing task.
1119     """
1120
1121
1122 class DownloadCancelled(YoutubeDLError):
1123     """ Exception raised when the download queue should be interrupted """
1124     msg = 'The download was cancelled'
1125
1126
1127 class ExistingVideoReached(DownloadCancelled):
1128     """ --break-on-existing triggered """
1129     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1130
1131
1132 class RejectedVideoReached(DownloadCancelled):
1133     """ --break-on-reject triggered """
1134     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1135
1136
1137 class MaxDownloadsReached(DownloadCancelled):
1138     """ --max-downloads limit has been reached. """
1139     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1140
1141
1142 class ReExtractInfo(YoutubeDLError):
1143     """ Video info needs to be re-extracted. """
1144
1145     def __init__(self, msg, expected=False):
1146         super().__init__(msg)
1147         self.expected = expected
1148
1149
1150 class ThrottledDownload(ReExtractInfo):
1151     """ Download speed below --throttled-rate. """
1152     msg = 'The download speed is below throttle limit'
1153
1154     def __init__(self):
1155         super().__init__(self.msg, expected=False)
1156
1157
1158 class UnavailableVideoError(YoutubeDLError):
1159     """Unavailable Format exception.
1160
1161     This exception will be thrown when a video is requested
1162     in a format that is not available for that video.
1163     """
1164     msg = 'Unable to download video'
1165
1166     def __init__(self, err=None):
1167         if err is not None:
1168             self.msg += f': {err}'
1169         super().__init__(self.msg)
1170
1171
1172 class ContentTooShortError(YoutubeDLError):
1173     """Content Too Short exception.
1174
1175     This exception may be raised by FileDownloader objects when a file they
1176     download is too small for what the server announced first, indicating
1177     the connection was probably interrupted.
1178     """
1179
1180     def __init__(self, downloaded, expected):
1181         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1182         # Both in bytes
1183         self.downloaded = downloaded
1184         self.expected = expected
1185
1186
1187 class XAttrMetadataError(YoutubeDLError):
1188     def __init__(self, code=None, msg='Unknown error'):
1189         super().__init__(msg)
1190         self.code = code
1191         self.msg = msg
1192
1193         # Parsing code and msg
1194         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1195                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1196             self.reason = 'NO_SPACE'
1197         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1198             self.reason = 'VALUE_TOO_LONG'
1199         else:
1200             self.reason = 'NOT_SUPPORTED'
1201
1202
1203 class XAttrUnavailableError(YoutubeDLError):
1204     pass
1205
1206
1207 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1208     hc = http_class(*args, **kwargs)
1209     source_address = ydl_handler._params.get('source_address')
1210
1211     if source_address is not None:
1212         # This is to workaround _create_connection() from socket where it will try all
1213         # address data from getaddrinfo() including IPv6. This filters the result from
1214         # getaddrinfo() based on the source_address value.
1215         # This is based on the cpython socket.create_connection() function.
1216         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1217         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1218             host, port = address
1219             err = None
1220             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1221             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1222             ip_addrs = [addr for addr in addrs if addr[0] == af]
1223             if addrs and not ip_addrs:
1224                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1225                 raise OSError(
1226                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1227                     % (ip_version, source_address[0]))
1228             for res in ip_addrs:
1229                 af, socktype, proto, canonname, sa = res
1230                 sock = None
1231                 try:
1232                     sock = socket.socket(af, socktype, proto)
1233                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1234                         sock.settimeout(timeout)
1235                     sock.bind(source_address)
1236                     sock.connect(sa)
1237                     err = None  # Explicitly break reference cycle
1238                     return sock
1239                 except OSError as _:
1240                     err = _
1241                     if sock is not None:
1242                         sock.close()
1243             if err is not None:
1244                 raise err
1245             else:
1246                 raise OSError('getaddrinfo returns an empty list')
1247         if hasattr(hc, '_create_connection'):
1248             hc._create_connection = _create_connection
1249         hc.source_address = (source_address, 0)
1250
1251     return hc
1252
1253
1254 def handle_youtubedl_headers(headers):
1255     filtered_headers = headers
1256
1257     if 'Youtubedl-no-compression' in filtered_headers:
1258         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1259         del filtered_headers['Youtubedl-no-compression']
1260
1261     return filtered_headers
1262
1263
1264 class YoutubeDLHandler(urllib.request.HTTPHandler):
1265     """Handler for HTTP requests and responses.
1266
1267     This class, when installed with an OpenerDirector, automatically adds
1268     the standard headers to every HTTP request and handles gzipped and
1269     deflated responses from web servers. If compression is to be avoided in
1270     a particular request, the original request in the program code only has
1271     to include the HTTP header "Youtubedl-no-compression", which will be
1272     removed before making the real request.
1273
1274     Part of this code was copied from:
1275
1276     http://techknack.net/python-urllib2-handlers/
1277
1278     Andrew Rowls, the author of that code, agreed to release it to the
1279     public domain.
1280     """
1281
1282     def __init__(self, params, *args, **kwargs):
1283         urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
1284         self._params = params
1285
1286     def http_open(self, req):
1287         conn_class = http.client.HTTPConnection
1288
1289         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1290         if socks_proxy:
1291             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1292             del req.headers['Ytdl-socks-proxy']
1293
1294         return self.do_open(functools.partial(
1295             _create_http_connection, self, conn_class, False),
1296             req)
1297
1298     @staticmethod
1299     def deflate(data):
1300         if not data:
1301             return data
1302         try:
1303             return zlib.decompress(data, -zlib.MAX_WBITS)
1304         except zlib.error:
1305             return zlib.decompress(data)
1306
1307     @staticmethod
1308     def brotli(data):
1309         if not data:
1310             return data
1311         return brotli.decompress(data)
1312
1313     def http_request(self, req):
1314         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1315         # always respected by websites, some tend to give out URLs with non percent-encoded
1316         # non-ASCII characters (see telemb.py, ard.py [#3412])
1317         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1318         # To work around aforementioned issue we will replace request's original URL with
1319         # percent-encoded one
1320         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1321         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1322         url = req.get_full_url()
1323         url_escaped = escape_url(url)
1324
1325         # Substitute URL if any change after escaping
1326         if url != url_escaped:
1327             req = update_Request(req, url=url_escaped)
1328
1329         for h, v in self._params.get('http_headers', std_headers).items():
1330             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1331             # The dict keys are capitalized because of this bug by urllib
1332             if h.capitalize() not in req.headers:
1333                 req.add_header(h, v)
1334
1335         if 'Accept-encoding' not in req.headers:
1336             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1337
1338         req.headers = handle_youtubedl_headers(req.headers)
1339
1340         return super().do_request_(req)
1341
1342     def http_response(self, req, resp):
1343         old_resp = resp
1344         # gzip
1345         if resp.headers.get('Content-encoding', '') == 'gzip':
1346             content = resp.read()
1347             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1348             try:
1349                 uncompressed = io.BytesIO(gz.read())
1350             except OSError as original_ioerror:
1351                 # There may be junk add the end of the file
1352                 # See http://stackoverflow.com/q/4928560/35070 for details
1353                 for i in range(1, 1024):
1354                     try:
1355                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1356                         uncompressed = io.BytesIO(gz.read())
1357                     except OSError:
1358                         continue
1359                     break
1360                 else:
1361                     raise original_ioerror
1362             resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1363             resp.msg = old_resp.msg
1364             del resp.headers['Content-encoding']
1365         # deflate
1366         if resp.headers.get('Content-encoding', '') == 'deflate':
1367             gz = io.BytesIO(self.deflate(resp.read()))
1368             resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1369             resp.msg = old_resp.msg
1370             del resp.headers['Content-encoding']
1371         # brotli
1372         if resp.headers.get('Content-encoding', '') == 'br':
1373             resp = urllib.request.addinfourl(
1374                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1375             resp.msg = old_resp.msg
1376             del resp.headers['Content-encoding']
1377         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1378         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1379         if 300 <= resp.code < 400:
1380             location = resp.headers.get('Location')
1381             if location:
1382                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1383                 location = location.encode('iso-8859-1').decode()
1384                 location_escaped = escape_url(location)
1385                 if location != location_escaped:
1386                     del resp.headers['Location']
1387                     resp.headers['Location'] = location_escaped
1388         return resp
1389
1390     https_request = http_request
1391     https_response = http_response
1392
1393
1394 def make_socks_conn_class(base_class, socks_proxy):
1395     assert issubclass(base_class, (
1396         http.client.HTTPConnection, http.client.HTTPSConnection))
1397
1398     url_components = urllib.parse.urlparse(socks_proxy)
1399     if url_components.scheme.lower() == 'socks5':
1400         socks_type = ProxyType.SOCKS5
1401     elif url_components.scheme.lower() in ('socks', 'socks4'):
1402         socks_type = ProxyType.SOCKS4
1403     elif url_components.scheme.lower() == 'socks4a':
1404         socks_type = ProxyType.SOCKS4A
1405
1406     def unquote_if_non_empty(s):
1407         if not s:
1408             return s
1409         return urllib.parse.unquote_plus(s)
1410
1411     proxy_args = (
1412         socks_type,
1413         url_components.hostname, url_components.port or 1080,
1414         True,  # Remote DNS
1415         unquote_if_non_empty(url_components.username),
1416         unquote_if_non_empty(url_components.password),
1417     )
1418
1419     class SocksConnection(base_class):
1420         def connect(self):
1421             self.sock = sockssocket()
1422             self.sock.setproxy(*proxy_args)
1423             if isinstance(self.timeout, (int, float)):
1424                 self.sock.settimeout(self.timeout)
1425             self.sock.connect((self.host, self.port))
1426
1427             if isinstance(self, http.client.HTTPSConnection):
1428                 if hasattr(self, '_context'):  # Python > 2.6
1429                     self.sock = self._context.wrap_socket(
1430                         self.sock, server_hostname=self.host)
1431                 else:
1432                     self.sock = ssl.wrap_socket(self.sock)
1433
1434     return SocksConnection
1435
1436
1437 class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
1438     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1439         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
1440         self._https_conn_class = https_conn_class or http.client.HTTPSConnection
1441         self._params = params
1442
1443     def https_open(self, req):
1444         kwargs = {}
1445         conn_class = self._https_conn_class
1446
1447         if hasattr(self, '_context'):  # python > 2.6
1448             kwargs['context'] = self._context
1449         if hasattr(self, '_check_hostname'):  # python 3.x
1450             kwargs['check_hostname'] = self._check_hostname
1451
1452         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1453         if socks_proxy:
1454             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1455             del req.headers['Ytdl-socks-proxy']
1456
1457         try:
1458             return self.do_open(
1459                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1460         except urllib.error.URLError as e:
1461             if (isinstance(e.reason, ssl.SSLError)
1462                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1463                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1464             raise
1465
1466
1467 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1468     """
1469     See [1] for cookie file format.
1470
1471     1. https://curl.haxx.se/docs/http-cookies.html
1472     """
1473     _HTTPONLY_PREFIX = '#HttpOnly_'
1474     _ENTRY_LEN = 7
1475     _HEADER = '''# Netscape HTTP Cookie File
1476 # This file is generated by yt-dlp.  Do not edit.
1477
1478 '''
1479     _CookieFileEntry = collections.namedtuple(
1480         'CookieFileEntry',
1481         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1482
1483     def __init__(self, filename=None, *args, **kwargs):
1484         super().__init__(None, *args, **kwargs)
1485         if self.is_path(filename):
1486             filename = os.fspath(filename)
1487         self.filename = filename
1488
1489     @staticmethod
1490     def _true_or_false(cndn):
1491         return 'TRUE' if cndn else 'FALSE'
1492
1493     @staticmethod
1494     def is_path(file):
1495         return isinstance(file, (str, bytes, os.PathLike))
1496
1497     @contextlib.contextmanager
1498     def open(self, file, *, write=False):
1499         if self.is_path(file):
1500             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1501                 yield f
1502         else:
1503             if write:
1504                 file.truncate(0)
1505             yield file
1506
1507     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1508         now = time.time()
1509         for cookie in self:
1510             if (not ignore_discard and cookie.discard
1511                     or not ignore_expires and cookie.is_expired(now)):
1512                 continue
1513             name, value = cookie.name, cookie.value
1514             if value is None:
1515                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1516                 # with no name, whereas http.cookiejar regards it as a
1517                 # cookie with no value.
1518                 name, value = '', name
1519             f.write('%s\n' % '\t'.join((
1520                 cookie.domain,
1521                 self._true_or_false(cookie.domain.startswith('.')),
1522                 cookie.path,
1523                 self._true_or_false(cookie.secure),
1524                 str_or_none(cookie.expires, default=''),
1525                 name, value
1526             )))
1527
1528     def save(self, filename=None, *args, **kwargs):
1529         """
1530         Save cookies to a file.
1531         Code is taken from CPython 3.6
1532         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1533
1534         if filename is None:
1535             if self.filename is not None:
1536                 filename = self.filename
1537             else:
1538                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1539
1540         # Store session cookies with `expires` set to 0 instead of an empty string
1541         for cookie in self:
1542             if cookie.expires is None:
1543                 cookie.expires = 0
1544
1545         with self.open(filename, write=True) as f:
1546             f.write(self._HEADER)
1547             self._really_save(f, *args, **kwargs)
1548
1549     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1550         """Load cookies from a file."""
1551         if filename is None:
1552             if self.filename is not None:
1553                 filename = self.filename
1554             else:
1555                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1556
1557         def prepare_line(line):
1558             if line.startswith(self._HTTPONLY_PREFIX):
1559                 line = line[len(self._HTTPONLY_PREFIX):]
1560             # comments and empty lines are fine
1561             if line.startswith('#') or not line.strip():
1562                 return line
1563             cookie_list = line.split('\t')
1564             if len(cookie_list) != self._ENTRY_LEN:
1565                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1566             cookie = self._CookieFileEntry(*cookie_list)
1567             if cookie.expires_at and not cookie.expires_at.isdigit():
1568                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1569             return line
1570
1571         cf = io.StringIO()
1572         with self.open(filename) as f:
1573             for line in f:
1574                 try:
1575                     cf.write(prepare_line(line))
1576                 except http.cookiejar.LoadError as e:
1577                     if f'{line.strip()} '[0] in '[{"':
1578                         raise http.cookiejar.LoadError(
1579                             'Cookies file must be Netscape formatted, not JSON. See  '
1580                             'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl')
1581                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1582                     continue
1583         cf.seek(0)
1584         self._really_load(cf, filename, ignore_discard, ignore_expires)
1585         # Session cookies are denoted by either `expires` field set to
1586         # an empty string or 0. MozillaCookieJar only recognizes the former
1587         # (see [1]). So we need force the latter to be recognized as session
1588         # cookies on our own.
1589         # Session cookies may be important for cookies-based authentication,
1590         # e.g. usually, when user does not check 'Remember me' check box while
1591         # logging in on a site, some important cookies are stored as session
1592         # cookies so that not recognizing them will result in failed login.
1593         # 1. https://bugs.python.org/issue17164
1594         for cookie in self:
1595             # Treat `expires=0` cookies as session cookies
1596             if cookie.expires == 0:
1597                 cookie.expires = None
1598                 cookie.discard = True
1599
1600
1601 class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
1602     def __init__(self, cookiejar=None):
1603         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
1604
1605     def http_response(self, request, response):
1606         return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
1607
1608     https_request = urllib.request.HTTPCookieProcessor.http_request
1609     https_response = http_response
1610
1611
1612 class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
1613     """YoutubeDL redirect handler
1614
1615     The code is based on HTTPRedirectHandler implementation from CPython [1].
1616
1617     This redirect handler solves two issues:
1618      - ensures redirect URL is always unicode under python 2
1619      - introduces support for experimental HTTP response status code
1620        308 Permanent Redirect [2] used by some sites [3]
1621
1622     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1623     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1624     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1625     """
1626
1627     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
1628
1629     def redirect_request(self, req, fp, code, msg, headers, newurl):
1630         """Return a Request or None in response to a redirect.
1631
1632         This is called by the http_error_30x methods when a
1633         redirection response is received.  If a redirection should
1634         take place, return a new Request to allow http_error_30x to
1635         perform the redirect.  Otherwise, raise HTTPError if no-one
1636         else should try to handle this url.  Return None if you can't
1637         but another Handler might.
1638         """
1639         m = req.get_method()
1640         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1641                  or code in (301, 302, 303) and m == "POST")):
1642             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
1643         # Strictly (according to RFC 2616), 301 or 302 in response to
1644         # a POST MUST NOT cause a redirection without confirmation
1645         # from the user (of urllib.request, in this case).  In practice,
1646         # essentially all clients do redirect in this case, so we do
1647         # the same.
1648
1649         # Be conciliant with URIs containing a space.  This is mainly
1650         # redundant with the more complete encoding done in http_error_302(),
1651         # but it is kept for compatibility with other callers.
1652         newurl = newurl.replace(' ', '%20')
1653
1654         CONTENT_HEADERS = ("content-length", "content-type")
1655         # NB: don't use dict comprehension for python 2.6 compatibility
1656         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1657
1658         # A 303 must either use GET or HEAD for subsequent request
1659         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1660         if code == 303 and m != 'HEAD':
1661             m = 'GET'
1662         # 301 and 302 redirects are commonly turned into a GET from a POST
1663         # for subsequent requests by browsers, so we'll do the same.
1664         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1665         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1666         if code in (301, 302) and m == 'POST':
1667             m = 'GET'
1668
1669         return urllib.request.Request(
1670             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1671             unverifiable=True, method=m)
1672
1673
1674 def extract_timezone(date_str):
1675     m = re.search(
1676         r'''(?x)
1677             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1678             (?P<tz>Z|                                            # just the UTC Z, or
1679                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1680                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1681                    [ ]?                                          # optional space
1682                 (?P<sign>\+|-)                                   # +/-
1683                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1684             $)
1685         ''', date_str)
1686     if not m:
1687         timezone = datetime.timedelta()
1688     else:
1689         date_str = date_str[:-len(m.group('tz'))]
1690         if not m.group('sign'):
1691             timezone = datetime.timedelta()
1692         else:
1693             sign = 1 if m.group('sign') == '+' else -1
1694             timezone = datetime.timedelta(
1695                 hours=sign * int(m.group('hours')),
1696                 minutes=sign * int(m.group('minutes')))
1697     return timezone, date_str
1698
1699
1700 def parse_iso8601(date_str, delimiter='T', timezone=None):
1701     """ Return a UNIX timestamp from the given date """
1702
1703     if date_str is None:
1704         return None
1705
1706     date_str = re.sub(r'\.[0-9]+', '', date_str)
1707
1708     if timezone is None:
1709         timezone, date_str = extract_timezone(date_str)
1710
1711     with contextlib.suppress(ValueError):
1712         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1713         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1714         return calendar.timegm(dt.timetuple())
1715
1716
1717 def date_formats(day_first=True):
1718     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1719
1720
1721 def unified_strdate(date_str, day_first=True):
1722     """Return a string with the date in the format YYYYMMDD"""
1723
1724     if date_str is None:
1725         return None
1726     upload_date = None
1727     # Replace commas
1728     date_str = date_str.replace(',', ' ')
1729     # Remove AM/PM + timezone
1730     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1731     _, date_str = extract_timezone(date_str)
1732
1733     for expression in date_formats(day_first):
1734         with contextlib.suppress(ValueError):
1735             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1736     if upload_date is None:
1737         timetuple = email.utils.parsedate_tz(date_str)
1738         if timetuple:
1739             with contextlib.suppress(ValueError):
1740                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1741     if upload_date is not None:
1742         return str(upload_date)
1743
1744
1745 def unified_timestamp(date_str, day_first=True):
1746     if date_str is None:
1747         return None
1748
1749     date_str = re.sub(r'[,|]', '', date_str)
1750
1751     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1752     timezone, date_str = extract_timezone(date_str)
1753
1754     # Remove AM/PM + timezone
1755     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1756
1757     # Remove unrecognized timezones from ISO 8601 alike timestamps
1758     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1759     if m:
1760         date_str = date_str[:-len(m.group('tz'))]
1761
1762     # Python only supports microseconds, so remove nanoseconds
1763     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1764     if m:
1765         date_str = m.group(1)
1766
1767     for expression in date_formats(day_first):
1768         with contextlib.suppress(ValueError):
1769             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1770             return calendar.timegm(dt.timetuple())
1771     timetuple = email.utils.parsedate_tz(date_str)
1772     if timetuple:
1773         return calendar.timegm(timetuple) + pm_delta * 3600
1774
1775
1776 def determine_ext(url, default_ext='unknown_video'):
1777     if url is None or '.' not in url:
1778         return default_ext
1779     guess = url.partition('?')[0].rpartition('.')[2]
1780     if re.match(r'^[A-Za-z0-9]+$', guess):
1781         return guess
1782     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1783     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1784         return guess.rstrip('/')
1785     else:
1786         return default_ext
1787
1788
1789 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1790     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1791
1792
1793 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1794     R"""
1795     Return a datetime object from a string.
1796     Supported format:
1797         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1798
1799     @param format       strftime format of DATE
1800     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1801                         auto: round to the unit provided in date_str (if applicable).
1802     """
1803     auto_precision = False
1804     if precision == 'auto':
1805         auto_precision = True
1806         precision = 'microsecond'
1807     today = datetime_round(datetime.datetime.utcnow(), precision)
1808     if date_str in ('now', 'today'):
1809         return today
1810     if date_str == 'yesterday':
1811         return today - datetime.timedelta(days=1)
1812     match = re.match(
1813         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1814         date_str)
1815     if match is not None:
1816         start_time = datetime_from_str(match.group('start'), precision, format)
1817         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1818         unit = match.group('unit')
1819         if unit == 'month' or unit == 'year':
1820             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1821             unit = 'day'
1822         else:
1823             if unit == 'week':
1824                 unit = 'day'
1825                 time *= 7
1826             delta = datetime.timedelta(**{unit + 's': time})
1827             new_date = start_time + delta
1828         if auto_precision:
1829             return datetime_round(new_date, unit)
1830         return new_date
1831
1832     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1833
1834
1835 def date_from_str(date_str, format='%Y%m%d', strict=False):
1836     R"""
1837     Return a date object from a string using datetime_from_str
1838
1839     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1840                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1841     """
1842     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1843         raise ValueError(f'Invalid date format "{date_str}"')
1844     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1845
1846
1847 def datetime_add_months(dt, months):
1848     """Increment/Decrement a datetime object by months."""
1849     month = dt.month + months - 1
1850     year = dt.year + month // 12
1851     month = month % 12 + 1
1852     day = min(dt.day, calendar.monthrange(year, month)[1])
1853     return dt.replace(year, month, day)
1854
1855
1856 def datetime_round(dt, precision='day'):
1857     """
1858     Round a datetime object's time to a specific precision
1859     """
1860     if precision == 'microsecond':
1861         return dt
1862
1863     unit_seconds = {
1864         'day': 86400,
1865         'hour': 3600,
1866         'minute': 60,
1867         'second': 1,
1868     }
1869     roundto = lambda x, n: ((x + n / 2) // n) * n
1870     timestamp = calendar.timegm(dt.timetuple())
1871     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1872
1873
1874 def hyphenate_date(date_str):
1875     """
1876     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1877     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1878     if match is not None:
1879         return '-'.join(match.groups())
1880     else:
1881         return date_str
1882
1883
1884 class DateRange:
1885     """Represents a time interval between two dates"""
1886
1887     def __init__(self, start=None, end=None):
1888         """start and end must be strings in the format accepted by date"""
1889         if start is not None:
1890             self.start = date_from_str(start, strict=True)
1891         else:
1892             self.start = datetime.datetime.min.date()
1893         if end is not None:
1894             self.end = date_from_str(end, strict=True)
1895         else:
1896             self.end = datetime.datetime.max.date()
1897         if self.start > self.end:
1898             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1899
1900     @classmethod
1901     def day(cls, day):
1902         """Returns a range that only contains the given day"""
1903         return cls(day, day)
1904
1905     def __contains__(self, date):
1906         """Check if the date is in the range"""
1907         if not isinstance(date, datetime.date):
1908             date = date_from_str(date)
1909         return self.start <= date <= self.end
1910
1911     def __str__(self):
1912         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1913
1914     def __eq__(self, other):
1915         return (isinstance(other, DateRange)
1916                 and self.start == other.start and self.end == other.end)
1917
1918
1919 def platform_name():
1920     """ Returns the platform name as a str """
1921     write_string('DeprecationWarning: yt_dlp.utils.platform_name is deprecated, use platform.platform instead')
1922     return platform.platform()
1923
1924
1925 @functools.cache
1926 def system_identifier():
1927     python_implementation = platform.python_implementation()
1928     if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
1929         python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
1930
1931     return 'Python %s (%s %s) - %s %s' % (
1932         platform.python_version(),
1933         python_implementation,
1934         platform.architecture()[0],
1935         platform.platform(),
1936         format_field(join_nonempty(*platform.libc_ver(), delim=' '), None, '(%s)'),
1937     )
1938
1939
1940 @functools.cache
1941 def get_windows_version():
1942     ''' Get Windows version. returns () if it's not running on Windows '''
1943     if compat_os_name == 'nt':
1944         return version_tuple(platform.win32_ver()[1])
1945     else:
1946         return ()
1947
1948
1949 def write_string(s, out=None, encoding=None):
1950     assert isinstance(s, str)
1951     out = out or sys.stderr
1952
1953     if compat_os_name == 'nt' and supports_terminal_sequences(out):
1954         s = re.sub(r'([\r\n]+)', r' \1', s)
1955
1956     enc, buffer = None, out
1957     if 'b' in getattr(out, 'mode', ''):
1958         enc = encoding or preferredencoding()
1959     elif hasattr(out, 'buffer'):
1960         buffer = out.buffer
1961         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1962
1963     buffer.write(s.encode(enc, 'ignore') if enc else s)
1964     out.flush()
1965
1966
1967 def bytes_to_intlist(bs):
1968     if not bs:
1969         return []
1970     if isinstance(bs[0], int):  # Python 3
1971         return list(bs)
1972     else:
1973         return [ord(c) for c in bs]
1974
1975
1976 def intlist_to_bytes(xs):
1977     if not xs:
1978         return b''
1979     return struct.pack('%dB' % len(xs), *xs)
1980
1981
1982 class LockingUnsupportedError(OSError):
1983     msg = 'File locking is not supported'
1984
1985     def __init__(self):
1986         super().__init__(self.msg)
1987
1988
1989 # Cross-platform file locking
1990 if sys.platform == 'win32':
1991     import ctypes
1992     import ctypes.wintypes
1993     import msvcrt
1994
1995     class OVERLAPPED(ctypes.Structure):
1996         _fields_ = [
1997             ('Internal', ctypes.wintypes.LPVOID),
1998             ('InternalHigh', ctypes.wintypes.LPVOID),
1999             ('Offset', ctypes.wintypes.DWORD),
2000             ('OffsetHigh', ctypes.wintypes.DWORD),
2001             ('hEvent', ctypes.wintypes.HANDLE),
2002         ]
2003
2004     kernel32 = ctypes.windll.kernel32
2005     LockFileEx = kernel32.LockFileEx
2006     LockFileEx.argtypes = [
2007         ctypes.wintypes.HANDLE,     # hFile
2008         ctypes.wintypes.DWORD,      # dwFlags
2009         ctypes.wintypes.DWORD,      # dwReserved
2010         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2011         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2012         ctypes.POINTER(OVERLAPPED)  # Overlapped
2013     ]
2014     LockFileEx.restype = ctypes.wintypes.BOOL
2015     UnlockFileEx = kernel32.UnlockFileEx
2016     UnlockFileEx.argtypes = [
2017         ctypes.wintypes.HANDLE,     # hFile
2018         ctypes.wintypes.DWORD,      # dwReserved
2019         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2020         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2021         ctypes.POINTER(OVERLAPPED)  # Overlapped
2022     ]
2023     UnlockFileEx.restype = ctypes.wintypes.BOOL
2024     whole_low = 0xffffffff
2025     whole_high = 0x7fffffff
2026
2027     def _lock_file(f, exclusive, block):
2028         overlapped = OVERLAPPED()
2029         overlapped.Offset = 0
2030         overlapped.OffsetHigh = 0
2031         overlapped.hEvent = 0
2032         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2033
2034         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2035                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2036                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2037             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2038             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2039
2040     def _unlock_file(f):
2041         assert f._lock_file_overlapped_p
2042         handle = msvcrt.get_osfhandle(f.fileno())
2043         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2044             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2045
2046 else:
2047     try:
2048         import fcntl
2049
2050         def _lock_file(f, exclusive, block):
2051             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2052             if not block:
2053                 flags |= fcntl.LOCK_NB
2054             try:
2055                 fcntl.flock(f, flags)
2056             except BlockingIOError:
2057                 raise
2058             except OSError:  # AOSP does not have flock()
2059                 fcntl.lockf(f, flags)
2060
2061         def _unlock_file(f):
2062             try:
2063                 fcntl.flock(f, fcntl.LOCK_UN)
2064             except OSError:
2065                 fcntl.lockf(f, fcntl.LOCK_UN)
2066
2067     except ImportError:
2068
2069         def _lock_file(f, exclusive, block):
2070             raise LockingUnsupportedError()
2071
2072         def _unlock_file(f):
2073             raise LockingUnsupportedError()
2074
2075
2076 class locked_file:
2077     locked = False
2078
2079     def __init__(self, filename, mode, block=True, encoding=None):
2080         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2081             raise NotImplementedError(mode)
2082         self.mode, self.block = mode, block
2083
2084         writable = any(f in mode for f in 'wax+')
2085         readable = any(f in mode for f in 'r+')
2086         flags = functools.reduce(operator.ior, (
2087             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2088             getattr(os, 'O_BINARY', 0),  # Windows only
2089             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2090             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2091             os.O_APPEND if 'a' in mode else 0,
2092             os.O_EXCL if 'x' in mode else 0,
2093             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2094         ))
2095
2096         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2097
2098     def __enter__(self):
2099         exclusive = 'r' not in self.mode
2100         try:
2101             _lock_file(self.f, exclusive, self.block)
2102             self.locked = True
2103         except OSError:
2104             self.f.close()
2105             raise
2106         if 'w' in self.mode:
2107             try:
2108                 self.f.truncate()
2109             except OSError as e:
2110                 if e.errno not in (
2111                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2112                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2113                 ):
2114                     raise
2115         return self
2116
2117     def unlock(self):
2118         if not self.locked:
2119             return
2120         try:
2121             _unlock_file(self.f)
2122         finally:
2123             self.locked = False
2124
2125     def __exit__(self, *_):
2126         try:
2127             self.unlock()
2128         finally:
2129             self.f.close()
2130
2131     open = __enter__
2132     close = __exit__
2133
2134     def __getattr__(self, attr):
2135         return getattr(self.f, attr)
2136
2137     def __iter__(self):
2138         return iter(self.f)
2139
2140
2141 @functools.cache
2142 def get_filesystem_encoding():
2143     encoding = sys.getfilesystemencoding()
2144     return encoding if encoding is not None else 'utf-8'
2145
2146
2147 def shell_quote(args):
2148     quoted_args = []
2149     encoding = get_filesystem_encoding()
2150     for a in args:
2151         if isinstance(a, bytes):
2152             # We may get a filename encoded with 'encodeFilename'
2153             a = a.decode(encoding)
2154         quoted_args.append(compat_shlex_quote(a))
2155     return ' '.join(quoted_args)
2156
2157
2158 def smuggle_url(url, data):
2159     """ Pass additional data in a URL for internal use. """
2160
2161     url, idata = unsmuggle_url(url, {})
2162     data.update(idata)
2163     sdata = urllib.parse.urlencode(
2164         {'__youtubedl_smuggle': json.dumps(data)})
2165     return url + '#' + sdata
2166
2167
2168 def unsmuggle_url(smug_url, default=None):
2169     if '#__youtubedl_smuggle' not in smug_url:
2170         return smug_url, default
2171     url, _, sdata = smug_url.rpartition('#')
2172     jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
2173     data = json.loads(jsond)
2174     return url, data
2175
2176
2177 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2178     """ Formats numbers with decimal sufixes like K, M, etc """
2179     num, factor = float_or_none(num), float(factor)
2180     if num is None or num < 0:
2181         return None
2182     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2183     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2184     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2185     if factor == 1024:
2186         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2187     converted = num / (factor ** exponent)
2188     return fmt % (converted, suffix)
2189
2190
2191 def format_bytes(bytes):
2192     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2193
2194
2195 def lookup_unit_table(unit_table, s):
2196     units_re = '|'.join(re.escape(u) for u in unit_table)
2197     m = re.match(
2198         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2199     if not m:
2200         return None
2201     num_str = m.group('num').replace(',', '.')
2202     mult = unit_table[m.group('unit')]
2203     return int(float(num_str) * mult)
2204
2205
2206 def parse_filesize(s):
2207     if s is None:
2208         return None
2209
2210     # The lower-case forms are of course incorrect and unofficial,
2211     # but we support those too
2212     _UNIT_TABLE = {
2213         'B': 1,
2214         'b': 1,
2215         'bytes': 1,
2216         'KiB': 1024,
2217         'KB': 1000,
2218         'kB': 1024,
2219         'Kb': 1000,
2220         'kb': 1000,
2221         'kilobytes': 1000,
2222         'kibibytes': 1024,
2223         'MiB': 1024 ** 2,
2224         'MB': 1000 ** 2,
2225         'mB': 1024 ** 2,
2226         'Mb': 1000 ** 2,
2227         'mb': 1000 ** 2,
2228         'megabytes': 1000 ** 2,
2229         'mebibytes': 1024 ** 2,
2230         'GiB': 1024 ** 3,
2231         'GB': 1000 ** 3,
2232         'gB': 1024 ** 3,
2233         'Gb': 1000 ** 3,
2234         'gb': 1000 ** 3,
2235         'gigabytes': 1000 ** 3,
2236         'gibibytes': 1024 ** 3,
2237         'TiB': 1024 ** 4,
2238         'TB': 1000 ** 4,
2239         'tB': 1024 ** 4,
2240         'Tb': 1000 ** 4,
2241         'tb': 1000 ** 4,
2242         'terabytes': 1000 ** 4,
2243         'tebibytes': 1024 ** 4,
2244         'PiB': 1024 ** 5,
2245         'PB': 1000 ** 5,
2246         'pB': 1024 ** 5,
2247         'Pb': 1000 ** 5,
2248         'pb': 1000 ** 5,
2249         'petabytes': 1000 ** 5,
2250         'pebibytes': 1024 ** 5,
2251         'EiB': 1024 ** 6,
2252         'EB': 1000 ** 6,
2253         'eB': 1024 ** 6,
2254         'Eb': 1000 ** 6,
2255         'eb': 1000 ** 6,
2256         'exabytes': 1000 ** 6,
2257         'exbibytes': 1024 ** 6,
2258         'ZiB': 1024 ** 7,
2259         'ZB': 1000 ** 7,
2260         'zB': 1024 ** 7,
2261         'Zb': 1000 ** 7,
2262         'zb': 1000 ** 7,
2263         'zettabytes': 1000 ** 7,
2264         'zebibytes': 1024 ** 7,
2265         'YiB': 1024 ** 8,
2266         'YB': 1000 ** 8,
2267         'yB': 1024 ** 8,
2268         'Yb': 1000 ** 8,
2269         'yb': 1000 ** 8,
2270         'yottabytes': 1000 ** 8,
2271         'yobibytes': 1024 ** 8,
2272     }
2273
2274     return lookup_unit_table(_UNIT_TABLE, s)
2275
2276
2277 def parse_count(s):
2278     if s is None:
2279         return None
2280
2281     s = re.sub(r'^[^\d]+\s', '', s).strip()
2282
2283     if re.match(r'^[\d,.]+$', s):
2284         return str_to_int(s)
2285
2286     _UNIT_TABLE = {
2287         'k': 1000,
2288         'K': 1000,
2289         'm': 1000 ** 2,
2290         'M': 1000 ** 2,
2291         'kk': 1000 ** 2,
2292         'KK': 1000 ** 2,
2293         'b': 1000 ** 3,
2294         'B': 1000 ** 3,
2295     }
2296
2297     ret = lookup_unit_table(_UNIT_TABLE, s)
2298     if ret is not None:
2299         return ret
2300
2301     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2302     if mobj:
2303         return str_to_int(mobj.group(1))
2304
2305
2306 def parse_resolution(s, *, lenient=False):
2307     if s is None:
2308         return {}
2309
2310     if lenient:
2311         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2312     else:
2313         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2314     if mobj:
2315         return {
2316             'width': int(mobj.group('w')),
2317             'height': int(mobj.group('h')),
2318         }
2319
2320     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2321     if mobj:
2322         return {'height': int(mobj.group(1))}
2323
2324     mobj = re.search(r'\b([48])[kK]\b', s)
2325     if mobj:
2326         return {'height': int(mobj.group(1)) * 540}
2327
2328     return {}
2329
2330
2331 def parse_bitrate(s):
2332     if not isinstance(s, str):
2333         return
2334     mobj = re.search(r'\b(\d+)\s*kbps', s)
2335     if mobj:
2336         return int(mobj.group(1))
2337
2338
2339 def month_by_name(name, lang='en'):
2340     """ Return the number of a month by (locale-independently) English name """
2341
2342     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2343
2344     try:
2345         return month_names.index(name) + 1
2346     except ValueError:
2347         return None
2348
2349
2350 def month_by_abbreviation(abbrev):
2351     """ Return the number of a month by (locale-independently) English
2352         abbreviations """
2353
2354     try:
2355         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2356     except ValueError:
2357         return None
2358
2359
2360 def fix_xml_ampersands(xml_str):
2361     """Replace all the '&' by '&amp;' in XML"""
2362     return re.sub(
2363         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2364         '&amp;',
2365         xml_str)
2366
2367
2368 def setproctitle(title):
2369     assert isinstance(title, str)
2370
2371     # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
2372     try:
2373         import ctypes
2374     except ImportError:
2375         return
2376
2377     try:
2378         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2379     except OSError:
2380         return
2381     except TypeError:
2382         # LoadLibrary in Windows Python 2.7.13 only expects
2383         # a bytestring, but since unicode_literals turns
2384         # every string into a unicode string, it fails.
2385         return
2386     title_bytes = title.encode()
2387     buf = ctypes.create_string_buffer(len(title_bytes))
2388     buf.value = title_bytes
2389     try:
2390         libc.prctl(15, buf, 0, 0, 0)
2391     except AttributeError:
2392         return  # Strange libc, just skip this
2393
2394
2395 def remove_start(s, start):
2396     return s[len(start):] if s is not None and s.startswith(start) else s
2397
2398
2399 def remove_end(s, end):
2400     return s[:-len(end)] if s is not None and s.endswith(end) else s
2401
2402
2403 def remove_quotes(s):
2404     if s is None or len(s) < 2:
2405         return s
2406     for quote in ('"', "'", ):
2407         if s[0] == quote and s[-1] == quote:
2408             return s[1:-1]
2409     return s
2410
2411
2412 def get_domain(url):
2413     """
2414     This implementation is inconsistent, but is kept for compatibility.
2415     Use this only for "webpage_url_domain"
2416     """
2417     return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
2418
2419
2420 def url_basename(url):
2421     path = urllib.parse.urlparse(url).path
2422     return path.strip('/').split('/')[-1]
2423
2424
2425 def base_url(url):
2426     return re.match(r'https?://[^?#&]+/', url).group()
2427
2428
2429 def urljoin(base, path):
2430     if isinstance(path, bytes):
2431         path = path.decode()
2432     if not isinstance(path, str) or not path:
2433         return None
2434     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2435         return path
2436     if isinstance(base, bytes):
2437         base = base.decode()
2438     if not isinstance(base, str) or not re.match(
2439             r'^(?:https?:)?//', base):
2440         return None
2441     return urllib.parse.urljoin(base, path)
2442
2443
2444 class HEADRequest(urllib.request.Request):
2445     def get_method(self):
2446         return 'HEAD'
2447
2448
2449 class PUTRequest(urllib.request.Request):
2450     def get_method(self):
2451         return 'PUT'
2452
2453
2454 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2455     if get_attr and v is not None:
2456         v = getattr(v, get_attr, None)
2457     try:
2458         return int(v) * invscale // scale
2459     except (ValueError, TypeError, OverflowError):
2460         return default
2461
2462
2463 def str_or_none(v, default=None):
2464     return default if v is None else str(v)
2465
2466
2467 def str_to_int(int_str):
2468     """ A more relaxed version of int_or_none """
2469     if isinstance(int_str, int):
2470         return int_str
2471     elif isinstance(int_str, str):
2472         int_str = re.sub(r'[,\.\+]', '', int_str)
2473         return int_or_none(int_str)
2474
2475
2476 def float_or_none(v, scale=1, invscale=1, default=None):
2477     if v is None:
2478         return default
2479     try:
2480         return float(v) * invscale / scale
2481     except (ValueError, TypeError):
2482         return default
2483
2484
2485 def bool_or_none(v, default=None):
2486     return v if isinstance(v, bool) else default
2487
2488
2489 def strip_or_none(v, default=None):
2490     return v.strip() if isinstance(v, str) else default
2491
2492
2493 def url_or_none(url):
2494     if not url or not isinstance(url, str):
2495         return None
2496     url = url.strip()
2497     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2498
2499
2500 def request_to_url(req):
2501     if isinstance(req, urllib.request.Request):
2502         return req.get_full_url()
2503     else:
2504         return req
2505
2506
2507 def strftime_or_none(timestamp, date_format, default=None):
2508     datetime_object = None
2509     try:
2510         if isinstance(timestamp, (int, float)):  # unix timestamp
2511             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2512         elif isinstance(timestamp, str):  # assume YYYYMMDD
2513             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2514         return datetime_object.strftime(date_format)
2515     except (ValueError, TypeError, AttributeError):
2516         return default
2517
2518
2519 def parse_duration(s):
2520     if not isinstance(s, str):
2521         return None
2522     s = s.strip()
2523     if not s:
2524         return None
2525
2526     days, hours, mins, secs, ms = [None] * 5
2527     m = re.match(r'''(?x)
2528             (?P<before_secs>
2529                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2530             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2531             (?P<ms>[.:][0-9]+)?Z?$
2532         ''', s)
2533     if m:
2534         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2535     else:
2536         m = re.match(
2537             r'''(?ix)(?:P?
2538                 (?:
2539                     [0-9]+\s*y(?:ears?)?,?\s*
2540                 )?
2541                 (?:
2542                     [0-9]+\s*m(?:onths?)?,?\s*
2543                 )?
2544                 (?:
2545                     [0-9]+\s*w(?:eeks?)?,?\s*
2546                 )?
2547                 (?:
2548                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2549                 )?
2550                 T)?
2551                 (?:
2552                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2553                 )?
2554                 (?:
2555                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2556                 )?
2557                 (?:
2558                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2559                 )?Z?$''', s)
2560         if m:
2561             days, hours, mins, secs, ms = m.groups()
2562         else:
2563             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2564             if m:
2565                 hours, mins = m.groups()
2566             else:
2567                 return None
2568
2569     if ms:
2570         ms = ms.replace(':', '.')
2571     return sum(float(part or 0) * mult for part, mult in (
2572         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2573
2574
2575 def prepend_extension(filename, ext, expected_real_ext=None):
2576     name, real_ext = os.path.splitext(filename)
2577     return (
2578         f'{name}.{ext}{real_ext}'
2579         if not expected_real_ext or real_ext[1:] == expected_real_ext
2580         else f'{filename}.{ext}')
2581
2582
2583 def replace_extension(filename, ext, expected_real_ext=None):
2584     name, real_ext = os.path.splitext(filename)
2585     return '{}.{}'.format(
2586         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2587         ext)
2588
2589
2590 def check_executable(exe, args=[]):
2591     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2592     args can be a list of arguments for a short output (like -version) """
2593     try:
2594         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2595     except OSError:
2596         return False
2597     return exe
2598
2599
2600 def _get_exe_version_output(exe, args, *, to_screen=None):
2601     if to_screen:
2602         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2603     try:
2604         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2605         # SIGTTOU if yt-dlp is run in the background.
2606         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2607         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2608                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2609     except OSError:
2610         return False
2611     return stdout
2612
2613
2614 def detect_exe_version(output, version_re=None, unrecognized='present'):
2615     assert isinstance(output, str)
2616     if version_re is None:
2617         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2618     m = re.search(version_re, output)
2619     if m:
2620         return m.group(1)
2621     else:
2622         return unrecognized
2623
2624
2625 def get_exe_version(exe, args=['--version'],
2626                     version_re=None, unrecognized='present'):
2627     """ Returns the version of the specified executable,
2628     or False if the executable is not present """
2629     out = _get_exe_version_output(exe, args)
2630     return detect_exe_version(out, version_re, unrecognized) if out else False
2631
2632
2633 def frange(start=0, stop=None, step=1):
2634     """Float range"""
2635     if stop is None:
2636         start, stop = 0, start
2637     sign = [-1, 1][step > 0] if step else 0
2638     while sign * start < sign * stop:
2639         yield start
2640         start += step
2641
2642
2643 class LazyList(collections.abc.Sequence):
2644     """Lazy immutable list from an iterable
2645     Note that slices of a LazyList are lists and not LazyList"""
2646
2647     class IndexError(IndexError):
2648         pass
2649
2650     def __init__(self, iterable, *, reverse=False, _cache=None):
2651         self._iterable = iter(iterable)
2652         self._cache = [] if _cache is None else _cache
2653         self._reversed = reverse
2654
2655     def __iter__(self):
2656         if self._reversed:
2657             # We need to consume the entire iterable to iterate in reverse
2658             yield from self.exhaust()
2659             return
2660         yield from self._cache
2661         for item in self._iterable:
2662             self._cache.append(item)
2663             yield item
2664
2665     def _exhaust(self):
2666         self._cache.extend(self._iterable)
2667         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2668         return self._cache
2669
2670     def exhaust(self):
2671         """Evaluate the entire iterable"""
2672         return self._exhaust()[::-1 if self._reversed else 1]
2673
2674     @staticmethod
2675     def _reverse_index(x):
2676         return None if x is None else ~x
2677
2678     def __getitem__(self, idx):
2679         if isinstance(idx, slice):
2680             if self._reversed:
2681                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2682             start, stop, step = idx.start, idx.stop, idx.step or 1
2683         elif isinstance(idx, int):
2684             if self._reversed:
2685                 idx = self._reverse_index(idx)
2686             start, stop, step = idx, idx, 0
2687         else:
2688             raise TypeError('indices must be integers or slices')
2689         if ((start or 0) < 0 or (stop or 0) < 0
2690                 or (start is None and step < 0)
2691                 or (stop is None and step > 0)):
2692             # We need to consume the entire iterable to be able to slice from the end
2693             # Obviously, never use this with infinite iterables
2694             self._exhaust()
2695             try:
2696                 return self._cache[idx]
2697             except IndexError as e:
2698                 raise self.IndexError(e) from e
2699         n = max(start or 0, stop or 0) - len(self._cache) + 1
2700         if n > 0:
2701             self._cache.extend(itertools.islice(self._iterable, n))
2702         try:
2703             return self._cache[idx]
2704         except IndexError as e:
2705             raise self.IndexError(e) from e
2706
2707     def __bool__(self):
2708         try:
2709             self[-1] if self._reversed else self[0]
2710         except self.IndexError:
2711             return False
2712         return True
2713
2714     def __len__(self):
2715         self._exhaust()
2716         return len(self._cache)
2717
2718     def __reversed__(self):
2719         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2720
2721     def __copy__(self):
2722         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2723
2724     def __repr__(self):
2725         # repr and str should mimic a list. So we exhaust the iterable
2726         return repr(self.exhaust())
2727
2728     def __str__(self):
2729         return repr(self.exhaust())
2730
2731
2732 class PagedList:
2733
2734     class IndexError(IndexError):
2735         pass
2736
2737     def __len__(self):
2738         # This is only useful for tests
2739         return len(self.getslice())
2740
2741     def __init__(self, pagefunc, pagesize, use_cache=True):
2742         self._pagefunc = pagefunc
2743         self._pagesize = pagesize
2744         self._pagecount = float('inf')
2745         self._use_cache = use_cache
2746         self._cache = {}
2747
2748     def getpage(self, pagenum):
2749         page_results = self._cache.get(pagenum)
2750         if page_results is None:
2751             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2752         if self._use_cache:
2753             self._cache[pagenum] = page_results
2754         return page_results
2755
2756     def getslice(self, start=0, end=None):
2757         return list(self._getslice(start, end))
2758
2759     def _getslice(self, start, end):
2760         raise NotImplementedError('This method must be implemented by subclasses')
2761
2762     def __getitem__(self, idx):
2763         assert self._use_cache, 'Indexing PagedList requires cache'
2764         if not isinstance(idx, int) or idx < 0:
2765             raise TypeError('indices must be non-negative integers')
2766         entries = self.getslice(idx, idx + 1)
2767         if not entries:
2768             raise self.IndexError()
2769         return entries[0]
2770
2771
2772 class OnDemandPagedList(PagedList):
2773     """Download pages until a page with less than maximum results"""
2774
2775     def _getslice(self, start, end):
2776         for pagenum in itertools.count(start // self._pagesize):
2777             firstid = pagenum * self._pagesize
2778             nextfirstid = pagenum * self._pagesize + self._pagesize
2779             if start >= nextfirstid:
2780                 continue
2781
2782             startv = (
2783                 start % self._pagesize
2784                 if firstid <= start < nextfirstid
2785                 else 0)
2786             endv = (
2787                 ((end - 1) % self._pagesize) + 1
2788                 if (end is not None and firstid <= end <= nextfirstid)
2789                 else None)
2790
2791             try:
2792                 page_results = self.getpage(pagenum)
2793             except Exception:
2794                 self._pagecount = pagenum - 1
2795                 raise
2796             if startv != 0 or endv is not None:
2797                 page_results = page_results[startv:endv]
2798             yield from page_results
2799
2800             # A little optimization - if current page is not "full", ie. does
2801             # not contain page_size videos then we can assume that this page
2802             # is the last one - there are no more ids on further pages -
2803             # i.e. no need to query again.
2804             if len(page_results) + startv < self._pagesize:
2805                 break
2806
2807             # If we got the whole page, but the next page is not interesting,
2808             # break out early as well
2809             if end == nextfirstid:
2810                 break
2811
2812
2813 class InAdvancePagedList(PagedList):
2814     """PagedList with total number of pages known in advance"""
2815
2816     def __init__(self, pagefunc, pagecount, pagesize):
2817         PagedList.__init__(self, pagefunc, pagesize, True)
2818         self._pagecount = pagecount
2819
2820     def _getslice(self, start, end):
2821         start_page = start // self._pagesize
2822         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2823         skip_elems = start - start_page * self._pagesize
2824         only_more = None if end is None else end - start
2825         for pagenum in range(start_page, end_page):
2826             page_results = self.getpage(pagenum)
2827             if skip_elems:
2828                 page_results = page_results[skip_elems:]
2829                 skip_elems = None
2830             if only_more is not None:
2831                 if len(page_results) < only_more:
2832                     only_more -= len(page_results)
2833                 else:
2834                     yield from page_results[:only_more]
2835                     break
2836             yield from page_results
2837
2838
2839 class PlaylistEntries:
2840     MissingEntry = object()
2841     is_exhausted = False
2842
2843     def __init__(self, ydl, info_dict):
2844         self.ydl = ydl
2845
2846         # _entries must be assigned now since infodict can change during iteration
2847         entries = info_dict.get('entries')
2848         if entries is None:
2849             raise EntryNotInPlaylist('There are no entries')
2850         elif isinstance(entries, list):
2851             self.is_exhausted = True
2852
2853         requested_entries = info_dict.get('requested_entries')
2854         self.is_incomplete = bool(requested_entries)
2855         if self.is_incomplete:
2856             assert self.is_exhausted
2857             self._entries = [self.MissingEntry] * max(requested_entries)
2858             for i, entry in zip(requested_entries, entries):
2859                 self._entries[i - 1] = entry
2860         elif isinstance(entries, (list, PagedList, LazyList)):
2861             self._entries = entries
2862         else:
2863             self._entries = LazyList(entries)
2864
2865     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2866         (?P<start>[+-]?\d+)?
2867         (?P<range>[:-]
2868             (?P<end>[+-]?\d+|inf(?:inite)?)?
2869             (?::(?P<step>[+-]?\d+))?
2870         )?''')
2871
2872     @classmethod
2873     def parse_playlist_items(cls, string):
2874         for segment in string.split(','):
2875             if not segment:
2876                 raise ValueError('There is two or more consecutive commas')
2877             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2878             if not mobj:
2879                 raise ValueError(f'{segment!r} is not a valid specification')
2880             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2881             if int_or_none(step) == 0:
2882                 raise ValueError(f'Step in {segment!r} cannot be zero')
2883             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2884
2885     def get_requested_items(self):
2886         playlist_items = self.ydl.params.get('playlist_items')
2887         playlist_start = self.ydl.params.get('playliststart', 1)
2888         playlist_end = self.ydl.params.get('playlistend')
2889         # For backwards compatibility, interpret -1 as whole list
2890         if playlist_end in (-1, None):
2891             playlist_end = ''
2892         if not playlist_items:
2893             playlist_items = f'{playlist_start}:{playlist_end}'
2894         elif playlist_start != 1 or playlist_end:
2895             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
2896
2897         for index in self.parse_playlist_items(playlist_items):
2898             for i, entry in self[index]:
2899                 yield i, entry
2900                 if not entry:
2901                     continue
2902                 try:
2903                     # TODO: Add auto-generated fields
2904                     self.ydl._match_entry(entry, incomplete=True, silent=True)
2905                 except (ExistingVideoReached, RejectedVideoReached):
2906                     return
2907
2908     def get_full_count(self):
2909         if self.is_exhausted and not self.is_incomplete:
2910             return len(self)
2911         elif isinstance(self._entries, InAdvancePagedList):
2912             if self._entries._pagesize == 1:
2913                 return self._entries._pagecount
2914
2915     @functools.cached_property
2916     def _getter(self):
2917         if isinstance(self._entries, list):
2918             def get_entry(i):
2919                 try:
2920                     entry = self._entries[i]
2921                 except IndexError:
2922                     entry = self.MissingEntry
2923                     if not self.is_incomplete:
2924                         raise self.IndexError()
2925                 if entry is self.MissingEntry:
2926                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
2927                 return entry
2928         else:
2929             def get_entry(i):
2930                 try:
2931                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
2932                 except (LazyList.IndexError, PagedList.IndexError):
2933                     raise self.IndexError()
2934         return get_entry
2935
2936     def __getitem__(self, idx):
2937         if isinstance(idx, int):
2938             idx = slice(idx, idx)
2939
2940         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
2941         step = 1 if idx.step is None else idx.step
2942         if idx.start is None:
2943             start = 0 if step > 0 else len(self) - 1
2944         else:
2945             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
2946
2947         # NB: Do not call len(self) when idx == [:]
2948         if idx.stop is None:
2949             stop = 0 if step < 0 else float('inf')
2950         else:
2951             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
2952         stop += [-1, 1][step > 0]
2953
2954         for i in frange(start, stop, step):
2955             if i < 0:
2956                 continue
2957             try:
2958                 entry = self._getter(i)
2959             except self.IndexError:
2960                 self.is_exhausted = True
2961                 if step > 0:
2962                     break
2963                 continue
2964             yield i + 1, entry
2965
2966     def __len__(self):
2967         return len(tuple(self[:]))
2968
2969     class IndexError(IndexError):
2970         pass
2971
2972
2973 def uppercase_escape(s):
2974     unicode_escape = codecs.getdecoder('unicode_escape')
2975     return re.sub(
2976         r'\\U[0-9a-fA-F]{8}',
2977         lambda m: unicode_escape(m.group(0))[0],
2978         s)
2979
2980
2981 def lowercase_escape(s):
2982     unicode_escape = codecs.getdecoder('unicode_escape')
2983     return re.sub(
2984         r'\\u[0-9a-fA-F]{4}',
2985         lambda m: unicode_escape(m.group(0))[0],
2986         s)
2987
2988
2989 def escape_rfc3986(s):
2990     """Escape non-ASCII characters as suggested by RFC 3986"""
2991     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2992
2993
2994 def escape_url(url):
2995     """Escape URL as suggested by RFC 3986"""
2996     url_parsed = urllib.parse.urlparse(url)
2997     return url_parsed._replace(
2998         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2999         path=escape_rfc3986(url_parsed.path),
3000         params=escape_rfc3986(url_parsed.params),
3001         query=escape_rfc3986(url_parsed.query),
3002         fragment=escape_rfc3986(url_parsed.fragment)
3003     ).geturl()
3004
3005
3006 def parse_qs(url):
3007     return urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
3008
3009
3010 def read_batch_urls(batch_fd):
3011     def fixup(url):
3012         if not isinstance(url, str):
3013             url = url.decode('utf-8', 'replace')
3014         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3015         for bom in BOM_UTF8:
3016             if url.startswith(bom):
3017                 url = url[len(bom):]
3018         url = url.lstrip()
3019         if not url or url.startswith(('#', ';', ']')):
3020             return False
3021         # "#" cannot be stripped out since it is part of the URI
3022         # However, it can be safely stripped out if following a whitespace
3023         return re.split(r'\s#', url, 1)[0].rstrip()
3024
3025     with contextlib.closing(batch_fd) as fd:
3026         return [url for url in map(fixup, fd) if url]
3027
3028
3029 def urlencode_postdata(*args, **kargs):
3030     return urllib.parse.urlencode(*args, **kargs).encode('ascii')
3031
3032
3033 def update_url_query(url, query):
3034     if not query:
3035         return url
3036     parsed_url = urllib.parse.urlparse(url)
3037     qs = urllib.parse.parse_qs(parsed_url.query)
3038     qs.update(query)
3039     return urllib.parse.urlunparse(parsed_url._replace(
3040         query=urllib.parse.urlencode(qs, True)))
3041
3042
3043 def update_Request(req, url=None, data=None, headers=None, query=None):
3044     req_headers = req.headers.copy()
3045     req_headers.update(headers or {})
3046     req_data = data or req.data
3047     req_url = update_url_query(url or req.get_full_url(), query)
3048     req_get_method = req.get_method()
3049     if req_get_method == 'HEAD':
3050         req_type = HEADRequest
3051     elif req_get_method == 'PUT':
3052         req_type = PUTRequest
3053     else:
3054         req_type = urllib.request.Request
3055     new_req = req_type(
3056         req_url, data=req_data, headers=req_headers,
3057         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3058     if hasattr(req, 'timeout'):
3059         new_req.timeout = req.timeout
3060     return new_req
3061
3062
3063 def _multipart_encode_impl(data, boundary):
3064     content_type = 'multipart/form-data; boundary=%s' % boundary
3065
3066     out = b''
3067     for k, v in data.items():
3068         out += b'--' + boundary.encode('ascii') + b'\r\n'
3069         if isinstance(k, str):
3070             k = k.encode()
3071         if isinstance(v, str):
3072             v = v.encode()
3073         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3074         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3075         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3076         if boundary.encode('ascii') in content:
3077             raise ValueError('Boundary overlaps with data')
3078         out += content
3079
3080     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3081
3082     return out, content_type
3083
3084
3085 def multipart_encode(data, boundary=None):
3086     '''
3087     Encode a dict to RFC 7578-compliant form-data
3088
3089     data:
3090         A dict where keys and values can be either Unicode or bytes-like
3091         objects.
3092     boundary:
3093         If specified a Unicode object, it's used as the boundary. Otherwise
3094         a random boundary is generated.
3095
3096     Reference: https://tools.ietf.org/html/rfc7578
3097     '''
3098     has_specified_boundary = boundary is not None
3099
3100     while True:
3101         if boundary is None:
3102             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3103
3104         try:
3105             out, content_type = _multipart_encode_impl(data, boundary)
3106             break
3107         except ValueError:
3108             if has_specified_boundary:
3109                 raise
3110             boundary = None
3111
3112     return out, content_type
3113
3114
3115 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3116     for val in map(d.get, variadic(key_or_keys)):
3117         if val is not None and (val or not skip_false_values):
3118             return val
3119     return default
3120
3121
3122 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3123     for f in funcs:
3124         try:
3125             val = f(*args, **kwargs)
3126         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
3127             pass
3128         else:
3129             if expected_type is None or isinstance(val, expected_type):
3130                 return val
3131
3132
3133 def try_get(src, getter, expected_type=None):
3134     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3135
3136
3137 def filter_dict(dct, cndn=lambda _, v: v is not None):
3138     return {k: v for k, v in dct.items() if cndn(k, v)}
3139
3140
3141 def merge_dicts(*dicts):
3142     merged = {}
3143     for a_dict in dicts:
3144         for k, v in a_dict.items():
3145             if (v is not None and k not in merged
3146                     or isinstance(v, str) and merged[k] == ''):
3147                 merged[k] = v
3148     return merged
3149
3150
3151 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3152     return string if isinstance(string, str) else str(string, encoding, errors)
3153
3154
3155 US_RATINGS = {
3156     'G': 0,
3157     'PG': 10,
3158     'PG-13': 13,
3159     'R': 16,
3160     'NC': 18,
3161 }
3162
3163
3164 TV_PARENTAL_GUIDELINES = {
3165     'TV-Y': 0,
3166     'TV-Y7': 7,
3167     'TV-G': 0,
3168     'TV-PG': 0,
3169     'TV-14': 14,
3170     'TV-MA': 17,
3171 }
3172
3173
3174 def parse_age_limit(s):
3175     # isinstance(False, int) is True. So type() must be used instead
3176     if type(s) is int:  # noqa: E721
3177         return s if 0 <= s <= 21 else None
3178     elif not isinstance(s, str):
3179         return None
3180     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3181     if m:
3182         return int(m.group('age'))
3183     s = s.upper()
3184     if s in US_RATINGS:
3185         return US_RATINGS[s]
3186     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3187     if m:
3188         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3189     return None
3190
3191
3192 def strip_jsonp(code):
3193     return re.sub(
3194         r'''(?sx)^
3195             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3196             (?:\s*&&\s*(?P=func_name))?
3197             \s*\(\s*(?P<callback_data>.*)\);?
3198             \s*?(?://[^\n]*)*$''',
3199         r'\g<callback_data>', code)
3200
3201
3202 def js_to_json(code, vars={}):
3203     # vars is a dict of var, val pairs to substitute
3204     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3205     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3206     INTEGER_TABLE = (
3207         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3208         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3209     )
3210
3211     def fix_kv(m):
3212         v = m.group(0)
3213         if v in ('true', 'false', 'null'):
3214             return v
3215         elif v in ('undefined', 'void 0'):
3216             return 'null'
3217         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3218             return ""
3219
3220         if v[0] in ("'", '"'):
3221             v = re.sub(r'(?s)\\.|"', lambda m: {
3222                 '"': '\\"',
3223                 "\\'": "'",
3224                 '\\\n': '',
3225                 '\\x': '\\u00',
3226             }.get(m.group(0), m.group(0)), v[1:-1])
3227         else:
3228             for regex, base in INTEGER_TABLE:
3229                 im = re.match(regex, v)
3230                 if im:
3231                     i = int(im.group(1), base)
3232                     return '"%d":' % i if v.endswith(':') else '%d' % i
3233
3234             if v in vars:
3235                 return vars[v]
3236
3237         return '"%s"' % v
3238
3239     def create_map(mobj):
3240         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
3241
3242     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3243     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
3244
3245     return re.sub(r'''(?sx)
3246         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
3247         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
3248         {comment}|,(?={skip}[\]}}])|
3249         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3250         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3251         [0-9]+(?={skip}:)|
3252         !+
3253         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3254
3255
3256 def qualities(quality_ids):
3257     """ Get a numeric quality value out of a list of possible values """
3258     def q(qid):
3259         try:
3260             return quality_ids.index(qid)
3261         except ValueError:
3262             return -1
3263     return q
3264
3265
3266 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3267
3268
3269 DEFAULT_OUTTMPL = {
3270     'default': '%(title)s [%(id)s].%(ext)s',
3271     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3272 }
3273 OUTTMPL_TYPES = {
3274     'chapter': None,
3275     'subtitle': None,
3276     'thumbnail': None,
3277     'description': 'description',
3278     'annotation': 'annotations.xml',
3279     'infojson': 'info.json',
3280     'link': None,
3281     'pl_video': None,
3282     'pl_thumbnail': None,
3283     'pl_description': 'description',
3284     'pl_infojson': 'info.json',
3285 }
3286
3287 # As of [1] format syntax is:
3288 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3289 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3290 STR_FORMAT_RE_TMPL = r'''(?x)
3291     (?<!%)(?P<prefix>(?:%%)*)
3292     %
3293     (?P<has_key>\((?P<key>{0})\))?
3294     (?P<format>
3295         (?P<conversion>[#0\-+ ]+)?
3296         (?P<min_width>\d+)?
3297         (?P<precision>\.\d+)?
3298         (?P<len_mod>[hlL])?  # unused in python
3299         {1}  # conversion type
3300     )
3301 '''
3302
3303
3304 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3305
3306
3307 def limit_length(s, length):
3308     """ Add ellipses to overly long strings """
3309     if s is None:
3310         return None
3311     ELLIPSES = '...'
3312     if len(s) > length:
3313         return s[:length - len(ELLIPSES)] + ELLIPSES
3314     return s
3315
3316
3317 def version_tuple(v):
3318     return tuple(int(e) for e in re.split(r'[-.]', v))
3319
3320
3321 def is_outdated_version(version, limit, assume_new=True):
3322     if not version:
3323         return not assume_new
3324     try:
3325         return version_tuple(version) < version_tuple(limit)
3326     except ValueError:
3327         return not assume_new
3328
3329
3330 def ytdl_is_updateable():
3331     """ Returns if yt-dlp can be updated with -U """
3332
3333     from .update import is_non_updateable
3334
3335     return not is_non_updateable()
3336
3337
3338 def args_to_str(args):
3339     # Get a short string representation for a subprocess command
3340     return ' '.join(compat_shlex_quote(a) for a in args)
3341
3342
3343 def error_to_compat_str(err):
3344     return str(err)
3345
3346
3347 def error_to_str(err):
3348     return f'{type(err).__name__}: {err}'
3349
3350
3351 def mimetype2ext(mt):
3352     if mt is None:
3353         return None
3354
3355     mt, _, params = mt.partition(';')
3356     mt = mt.strip()
3357
3358     FULL_MAP = {
3359         'audio/mp4': 'm4a',
3360         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3361         # it's the most popular one
3362         'audio/mpeg': 'mp3',
3363         'audio/x-wav': 'wav',
3364         'audio/wav': 'wav',
3365         'audio/wave': 'wav',
3366     }
3367
3368     ext = FULL_MAP.get(mt)
3369     if ext is not None:
3370         return ext
3371
3372     SUBTYPE_MAP = {
3373         '3gpp': '3gp',
3374         'smptett+xml': 'tt',
3375         'ttaf+xml': 'dfxp',
3376         'ttml+xml': 'ttml',
3377         'x-flv': 'flv',
3378         'x-mp4-fragmented': 'mp4',
3379         'x-ms-sami': 'sami',
3380         'x-ms-wmv': 'wmv',
3381         'mpegurl': 'm3u8',
3382         'x-mpegurl': 'm3u8',
3383         'vnd.apple.mpegurl': 'm3u8',
3384         'dash+xml': 'mpd',
3385         'f4m+xml': 'f4m',
3386         'hds+xml': 'f4m',
3387         'vnd.ms-sstr+xml': 'ism',
3388         'quicktime': 'mov',
3389         'mp2t': 'ts',
3390         'x-wav': 'wav',
3391         'filmstrip+json': 'fs',
3392         'svg+xml': 'svg',
3393     }
3394
3395     _, _, subtype = mt.rpartition('/')
3396     ext = SUBTYPE_MAP.get(subtype.lower())
3397     if ext is not None:
3398         return ext
3399
3400     SUFFIX_MAP = {
3401         'json': 'json',
3402         'xml': 'xml',
3403         'zip': 'zip',
3404         'gzip': 'gz',
3405     }
3406
3407     _, _, suffix = subtype.partition('+')
3408     ext = SUFFIX_MAP.get(suffix)
3409     if ext is not None:
3410         return ext
3411
3412     return subtype.replace('+', '.')
3413
3414
3415 def ext2mimetype(ext_or_url):
3416     if not ext_or_url:
3417         return None
3418     if '.' not in ext_or_url:
3419         ext_or_url = f'file.{ext_or_url}'
3420     return mimetypes.guess_type(ext_or_url)[0]
3421
3422
3423 def parse_codecs(codecs_str):
3424     # http://tools.ietf.org/html/rfc6381
3425     if not codecs_str:
3426         return {}
3427     split_codecs = list(filter(None, map(
3428         str.strip, codecs_str.strip().strip(',').split(','))))
3429     vcodec, acodec, scodec, hdr = None, None, None, None
3430     for full_codec in split_codecs:
3431         parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
3432         if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3433                         'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3434             if vcodec:
3435                 continue
3436             vcodec = full_codec
3437             if parts[0] in ('dvh1', 'dvhe'):
3438                 hdr = 'DV'
3439             elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
3440                 hdr = 'HDR10'
3441             elif parts[:2] == ['vp9', '2']:
3442                 hdr = 'HDR10'
3443         elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac',
3444                           'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3445             acodec = acodec or full_codec
3446         elif parts[0] in ('stpp', 'wvtt'):
3447             scodec = scodec or full_codec
3448         else:
3449             write_string(f'WARNING: Unknown codec {full_codec}\n')
3450     if vcodec or acodec or scodec:
3451         return {
3452             'vcodec': vcodec or 'none',
3453             'acodec': acodec or 'none',
3454             'dynamic_range': hdr,
3455             **({'scodec': scodec} if scodec is not None else {}),
3456         }
3457     elif len(split_codecs) == 2:
3458         return {
3459             'vcodec': split_codecs[0],
3460             'acodec': split_codecs[1],
3461         }
3462     return {}
3463
3464
3465 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
3466     assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
3467
3468     allow_mkv = not preferences or 'mkv' in preferences
3469
3470     if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3471         return 'mkv'  # TODO: any other format allows this?
3472
3473     # TODO: All codecs supported by parse_codecs isn't handled here
3474     COMPATIBLE_CODECS = {
3475         'mp4': {
3476             'av1', 'hevc', 'avc1', 'mp4a',  # fourcc (m3u8, mpd)
3477             'h264', 'aacl',  # Set in ISM
3478         },
3479         'webm': {
3480             'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3481             'vp9x', 'vp8x',  # in the webm spec
3482         },
3483     }
3484
3485     sanitize_codec = functools.partial(try_get, getter=lambda x: x.split('.')[0].replace('0', ''))
3486     vcodec, acodec = sanitize_codec(vcodecs[0]), sanitize_codec(acodecs[0])
3487
3488     for ext in preferences or COMPATIBLE_CODECS.keys():
3489         codec_set = COMPATIBLE_CODECS.get(ext, set())
3490         if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3491             return ext
3492
3493     COMPATIBLE_EXTS = (
3494         {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3495         {'webm'},
3496     )
3497     for ext in preferences or vexts:
3498         current_exts = {ext, *vexts, *aexts}
3499         if ext == 'mkv' or current_exts == {ext} or any(
3500                 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3501             return ext
3502     return 'mkv' if allow_mkv else preferences[-1]
3503
3504
3505 def urlhandle_detect_ext(url_handle):
3506     getheader = url_handle.headers.get
3507
3508     cd = getheader('Content-Disposition')
3509     if cd:
3510         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3511         if m:
3512             e = determine_ext(m.group('filename'), default_ext=None)
3513             if e:
3514                 return e
3515
3516     return mimetype2ext(getheader('Content-Type'))
3517
3518
3519 def encode_data_uri(data, mime_type):
3520     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3521
3522
3523 def age_restricted(content_limit, age_limit):
3524     """ Returns True iff the content should be blocked """
3525
3526     if age_limit is None:  # No limit set
3527         return False
3528     if content_limit is None:
3529         return False  # Content available for everyone
3530     return age_limit < content_limit
3531
3532
3533 # List of known byte-order-marks (BOM)
3534 BOMS = [
3535     (b'\xef\xbb\xbf', 'utf-8'),
3536     (b'\x00\x00\xfe\xff', 'utf-32-be'),
3537     (b'\xff\xfe\x00\x00', 'utf-32-le'),
3538     (b'\xff\xfe', 'utf-16-le'),
3539     (b'\xfe\xff', 'utf-16-be'),
3540 ]
3541
3542
3543 def is_html(first_bytes):
3544     """ Detect whether a file contains HTML by examining its first bytes. """
3545
3546     encoding = 'utf-8'
3547     for bom, enc in BOMS:
3548         while first_bytes.startswith(bom):
3549             encoding, first_bytes = enc, first_bytes[len(bom):]
3550
3551     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3552
3553
3554 def determine_protocol(info_dict):
3555     protocol = info_dict.get('protocol')
3556     if protocol is not None:
3557         return protocol
3558
3559     url = sanitize_url(info_dict['url'])
3560     if url.startswith('rtmp'):
3561         return 'rtmp'
3562     elif url.startswith('mms'):
3563         return 'mms'
3564     elif url.startswith('rtsp'):
3565         return 'rtsp'
3566
3567     ext = determine_ext(url)
3568     if ext == 'm3u8':
3569         return 'm3u8'
3570     elif ext == 'f4m':
3571         return 'f4m'
3572
3573     return urllib.parse.urlparse(url).scheme
3574
3575
3576 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3577     """ Render a list of rows, each as a list of values.
3578     Text after a \t will be right aligned """
3579     def width(string):
3580         return len(remove_terminal_sequences(string).replace('\t', ''))
3581
3582     def get_max_lens(table):
3583         return [max(width(str(v)) for v in col) for col in zip(*table)]
3584
3585     def filter_using_list(row, filterArray):
3586         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3587
3588     max_lens = get_max_lens(data) if hide_empty else []
3589     header_row = filter_using_list(header_row, max_lens)
3590     data = [filter_using_list(row, max_lens) for row in data]
3591
3592     table = [header_row] + data
3593     max_lens = get_max_lens(table)
3594     extra_gap += 1
3595     if delim:
3596         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3597         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3598     for row in table:
3599         for pos, text in enumerate(map(str, row)):
3600             if '\t' in text:
3601                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3602             else:
3603                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3604     ret = '\n'.join(''.join(row).rstrip() for row in table)
3605     return ret
3606
3607
3608 def _match_one(filter_part, dct, incomplete):
3609     # TODO: Generalize code with YoutubeDL._build_format_filter
3610     STRING_OPERATORS = {
3611         '*=': operator.contains,
3612         '^=': lambda attr, value: attr.startswith(value),
3613         '$=': lambda attr, value: attr.endswith(value),
3614         '~=': lambda attr, value: re.search(value, attr),
3615     }
3616     COMPARISON_OPERATORS = {
3617         **STRING_OPERATORS,
3618         '<=': operator.le,  # "<=" must be defined above "<"
3619         '<': operator.lt,
3620         '>=': operator.ge,
3621         '>': operator.gt,
3622         '=': operator.eq,
3623     }
3624
3625     if isinstance(incomplete, bool):
3626         is_incomplete = lambda _: incomplete
3627     else:
3628         is_incomplete = lambda k: k in incomplete
3629
3630     operator_rex = re.compile(r'''(?x)
3631         (?P<key>[a-z_]+)
3632         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3633         (?:
3634             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3635             (?P<strval>.+?)
3636         )
3637         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3638     m = operator_rex.fullmatch(filter_part.strip())
3639     if m:
3640         m = m.groupdict()
3641         unnegated_op = COMPARISON_OPERATORS[m['op']]
3642         if m['negation']:
3643             op = lambda attr, value: not unnegated_op(attr, value)
3644         else:
3645             op = unnegated_op
3646         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3647         if m['quote']:
3648             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3649         actual_value = dct.get(m['key'])
3650         numeric_comparison = None
3651         if isinstance(actual_value, (int, float)):
3652             # If the original field is a string and matching comparisonvalue is
3653             # a number we should respect the origin of the original field
3654             # and process comparison value as a string (see
3655             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3656             try:
3657                 numeric_comparison = int(comparison_value)
3658             except ValueError:
3659                 numeric_comparison = parse_filesize(comparison_value)
3660                 if numeric_comparison is None:
3661                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3662                 if numeric_comparison is None:
3663                     numeric_comparison = parse_duration(comparison_value)
3664         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3665             raise ValueError('Operator %s only supports string values!' % m['op'])
3666         if actual_value is None:
3667             return is_incomplete(m['key']) or m['none_inclusive']
3668         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3669
3670     UNARY_OPERATORS = {
3671         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3672         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3673     }
3674     operator_rex = re.compile(r'''(?x)
3675         (?P<op>%s)\s*(?P<key>[a-z_]+)
3676         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3677     m = operator_rex.fullmatch(filter_part.strip())
3678     if m:
3679         op = UNARY_OPERATORS[m.group('op')]
3680         actual_value = dct.get(m.group('key'))
3681         if is_incomplete(m.group('key')) and actual_value is None:
3682             return True
3683         return op(actual_value)
3684
3685     raise ValueError('Invalid filter part %r' % filter_part)
3686
3687
3688 def match_str(filter_str, dct, incomplete=False):
3689     """ Filter a dictionary with a simple string syntax.
3690     @returns           Whether the filter passes
3691     @param incomplete  Set of keys that is expected to be missing from dct.
3692                        Can be True/False to indicate all/none of the keys may be missing.
3693                        All conditions on incomplete keys pass if the key is missing
3694     """
3695     return all(
3696         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3697         for filter_part in re.split(r'(?<!\\)&', filter_str))
3698
3699
3700 def match_filter_func(filters):
3701     if not filters:
3702         return None
3703     filters = set(variadic(filters))
3704
3705     interactive = '-' in filters
3706     if interactive:
3707         filters.remove('-')
3708
3709     def _match_func(info_dict, incomplete=False):
3710         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3711             return NO_DEFAULT if interactive and not incomplete else None
3712         else:
3713             video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
3714             filter_str = ') | ('.join(map(str.strip, filters))
3715             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3716     return _match_func
3717
3718
3719 class download_range_func:
3720     def __init__(self, chapters, ranges):
3721         self.chapters, self.ranges = chapters, ranges
3722
3723     def __call__(self, info_dict, ydl):
3724         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3725                    else 'Cannot match chapters since chapter information is unavailable')
3726         for regex in self.chapters or []:
3727             for i, chapter in enumerate(info_dict.get('chapters') or []):
3728                 if re.search(regex, chapter['title']):
3729                     warning = None
3730                     yield {**chapter, 'index': i}
3731         if self.chapters and warning:
3732             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3733
3734         yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
3735
3736     def __eq__(self, other):
3737         return (isinstance(other, download_range_func)
3738                 and self.chapters == other.chapters and self.ranges == other.ranges)
3739
3740
3741 def parse_dfxp_time_expr(time_expr):
3742     if not time_expr:
3743         return
3744
3745     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3746     if mobj:
3747         return float(mobj.group('time_offset'))
3748
3749     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3750     if mobj:
3751         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3752
3753
3754 def srt_subtitles_timecode(seconds):
3755     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3756
3757
3758 def ass_subtitles_timecode(seconds):
3759     time = timetuple_from_msec(seconds * 1000)
3760     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3761
3762
3763 def dfxp2srt(dfxp_data):
3764     '''
3765     @param dfxp_data A bytes-like object containing DFXP data
3766     @returns A unicode object containing converted SRT data
3767     '''
3768     LEGACY_NAMESPACES = (
3769         (b'http://www.w3.org/ns/ttml', [
3770             b'http://www.w3.org/2004/11/ttaf1',
3771             b'http://www.w3.org/2006/04/ttaf1',
3772             b'http://www.w3.org/2006/10/ttaf1',
3773         ]),
3774         (b'http://www.w3.org/ns/ttml#styling', [
3775             b'http://www.w3.org/ns/ttml#style',
3776         ]),
3777     )
3778
3779     SUPPORTED_STYLING = [
3780         'color',
3781         'fontFamily',
3782         'fontSize',
3783         'fontStyle',
3784         'fontWeight',
3785         'textDecoration'
3786     ]
3787
3788     _x = functools.partial(xpath_with_ns, ns_map={
3789         'xml': 'http://www.w3.org/XML/1998/namespace',
3790         'ttml': 'http://www.w3.org/ns/ttml',
3791         'tts': 'http://www.w3.org/ns/ttml#styling',
3792     })
3793
3794     styles = {}
3795     default_style = {}
3796
3797     class TTMLPElementParser:
3798         _out = ''
3799         _unclosed_elements = []
3800         _applied_styles = []
3801
3802         def start(self, tag, attrib):
3803             if tag in (_x('ttml:br'), 'br'):
3804                 self._out += '\n'
3805             else:
3806                 unclosed_elements = []
3807                 style = {}
3808                 element_style_id = attrib.get('style')
3809                 if default_style:
3810                     style.update(default_style)
3811                 if element_style_id:
3812                     style.update(styles.get(element_style_id, {}))
3813                 for prop in SUPPORTED_STYLING:
3814                     prop_val = attrib.get(_x('tts:' + prop))
3815                     if prop_val:
3816                         style[prop] = prop_val
3817                 if style:
3818                     font = ''
3819                     for k, v in sorted(style.items()):
3820                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3821                             continue
3822                         if k == 'color':
3823                             font += ' color="%s"' % v
3824                         elif k == 'fontSize':
3825                             font += ' size="%s"' % v
3826                         elif k == 'fontFamily':
3827                             font += ' face="%s"' % v
3828                         elif k == 'fontWeight' and v == 'bold':
3829                             self._out += '<b>'
3830                             unclosed_elements.append('b')
3831                         elif k == 'fontStyle' and v == 'italic':
3832                             self._out += '<i>'
3833                             unclosed_elements.append('i')
3834                         elif k == 'textDecoration' and v == 'underline':
3835                             self._out += '<u>'
3836                             unclosed_elements.append('u')
3837                     if font:
3838                         self._out += '<font' + font + '>'
3839                         unclosed_elements.append('font')
3840                     applied_style = {}
3841                     if self._applied_styles:
3842                         applied_style.update(self._applied_styles[-1])
3843                     applied_style.update(style)
3844                     self._applied_styles.append(applied_style)
3845                 self._unclosed_elements.append(unclosed_elements)
3846
3847         def end(self, tag):
3848             if tag not in (_x('ttml:br'), 'br'):
3849                 unclosed_elements = self._unclosed_elements.pop()
3850                 for element in reversed(unclosed_elements):
3851                     self._out += '</%s>' % element
3852                 if unclosed_elements and self._applied_styles:
3853                     self._applied_styles.pop()
3854
3855         def data(self, data):
3856             self._out += data
3857
3858         def close(self):
3859             return self._out.strip()
3860
3861     def parse_node(node):
3862         target = TTMLPElementParser()
3863         parser = xml.etree.ElementTree.XMLParser(target=target)
3864         parser.feed(xml.etree.ElementTree.tostring(node))
3865         return parser.close()
3866
3867     for k, v in LEGACY_NAMESPACES:
3868         for ns in v:
3869             dfxp_data = dfxp_data.replace(ns, k)
3870
3871     dfxp = compat_etree_fromstring(dfxp_data)
3872     out = []
3873     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3874
3875     if not paras:
3876         raise ValueError('Invalid dfxp/TTML subtitle')
3877
3878     repeat = False
3879     while True:
3880         for style in dfxp.findall(_x('.//ttml:style')):
3881             style_id = style.get('id') or style.get(_x('xml:id'))
3882             if not style_id:
3883                 continue
3884             parent_style_id = style.get('style')
3885             if parent_style_id:
3886                 if parent_style_id not in styles:
3887                     repeat = True
3888                     continue
3889                 styles[style_id] = styles[parent_style_id].copy()
3890             for prop in SUPPORTED_STYLING:
3891                 prop_val = style.get(_x('tts:' + prop))
3892                 if prop_val:
3893                     styles.setdefault(style_id, {})[prop] = prop_val
3894         if repeat:
3895             repeat = False
3896         else:
3897             break
3898
3899     for p in ('body', 'div'):
3900         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3901         if ele is None:
3902             continue
3903         style = styles.get(ele.get('style'))
3904         if not style:
3905             continue
3906         default_style.update(style)
3907
3908     for para, index in zip(paras, itertools.count(1)):
3909         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3910         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3911         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3912         if begin_time is None:
3913             continue
3914         if not end_time:
3915             if not dur:
3916                 continue
3917             end_time = begin_time + dur
3918         out.append('%d\n%s --> %s\n%s\n\n' % (
3919             index,
3920             srt_subtitles_timecode(begin_time),
3921             srt_subtitles_timecode(end_time),
3922             parse_node(para)))
3923
3924     return ''.join(out)
3925
3926
3927 def cli_option(params, command_option, param, separator=None):
3928     param = params.get(param)
3929     return ([] if param is None
3930             else [command_option, str(param)] if separator is None
3931             else [f'{command_option}{separator}{param}'])
3932
3933
3934 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3935     param = params.get(param)
3936     assert param in (True, False, None)
3937     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
3938
3939
3940 def cli_valueless_option(params, command_option, param, expected_value=True):
3941     return [command_option] if params.get(param) == expected_value else []
3942
3943
3944 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3945     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3946         if use_compat:
3947             return argdict
3948         else:
3949             argdict = None
3950     if argdict is None:
3951         return default
3952     assert isinstance(argdict, dict)
3953
3954     assert isinstance(keys, (list, tuple))
3955     for key_list in keys:
3956         arg_list = list(filter(
3957             lambda x: x is not None,
3958             [argdict.get(key.lower()) for key in variadic(key_list)]))
3959         if arg_list:
3960             return [arg for args in arg_list for arg in args]
3961     return default
3962
3963
3964 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3965     main_key, exe = main_key.lower(), exe.lower()
3966     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3967     keys = [f'{root_key}{k}' for k in (keys or [''])]
3968     if root_key in keys:
3969         if main_key != exe:
3970             keys.append((main_key, exe))
3971         keys.append('default')
3972     else:
3973         use_compat = False
3974     return cli_configuration_args(argdict, keys, default, use_compat)
3975
3976
3977 class ISO639Utils:
3978     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3979     _lang_map = {
3980         'aa': 'aar',
3981         'ab': 'abk',
3982         'ae': 'ave',
3983         'af': 'afr',
3984         'ak': 'aka',
3985         'am': 'amh',
3986         'an': 'arg',
3987         'ar': 'ara',
3988         'as': 'asm',
3989         'av': 'ava',
3990         'ay': 'aym',
3991         'az': 'aze',
3992         'ba': 'bak',
3993         'be': 'bel',
3994         'bg': 'bul',
3995         'bh': 'bih',
3996         'bi': 'bis',
3997         'bm': 'bam',
3998         'bn': 'ben',
3999         'bo': 'bod',
4000         'br': 'bre',
4001         'bs': 'bos',
4002         'ca': 'cat',
4003         'ce': 'che',
4004         'ch': 'cha',
4005         'co': 'cos',
4006         'cr': 'cre',
4007         'cs': 'ces',
4008         'cu': 'chu',
4009         'cv': 'chv',
4010         'cy': 'cym',
4011         'da': 'dan',
4012         'de': 'deu',
4013         'dv': 'div',
4014         'dz': 'dzo',
4015         'ee': 'ewe',
4016         'el': 'ell',
4017         'en': 'eng',
4018         'eo': 'epo',
4019         'es': 'spa',
4020         'et': 'est',
4021         'eu': 'eus',
4022         'fa': 'fas',
4023         'ff': 'ful',
4024         'fi': 'fin',
4025         'fj': 'fij',
4026         'fo': 'fao',
4027         'fr': 'fra',
4028         'fy': 'fry',
4029         'ga': 'gle',
4030         'gd': 'gla',
4031         'gl': 'glg',
4032         'gn': 'grn',
4033         'gu': 'guj',
4034         'gv': 'glv',
4035         'ha': 'hau',
4036         'he': 'heb',
4037         'iw': 'heb',  # Replaced by he in 1989 revision
4038         'hi': 'hin',
4039         'ho': 'hmo',
4040         'hr': 'hrv',
4041         'ht': 'hat',
4042         'hu': 'hun',
4043         'hy': 'hye',
4044         'hz': 'her',
4045         'ia': 'ina',
4046         'id': 'ind',
4047         'in': 'ind',  # Replaced by id in 1989 revision
4048         'ie': 'ile',
4049         'ig': 'ibo',
4050         'ii': 'iii',
4051         'ik': 'ipk',
4052         'io': 'ido',
4053         'is': 'isl',
4054         'it': 'ita',
4055         'iu': 'iku',
4056         'ja': 'jpn',
4057         'jv': 'jav',
4058         'ka': 'kat',
4059         'kg': 'kon',
4060         'ki': 'kik',
4061         'kj': 'kua',
4062         'kk': 'kaz',
4063         'kl': 'kal',
4064         'km': 'khm',
4065         'kn': 'kan',
4066         'ko': 'kor',
4067         'kr': 'kau',
4068         'ks': 'kas',
4069         'ku': 'kur',
4070         'kv': 'kom',
4071         'kw': 'cor',
4072         'ky': 'kir',
4073         'la': 'lat',
4074         'lb': 'ltz',
4075         'lg': 'lug',
4076         'li': 'lim',
4077         'ln': 'lin',
4078         'lo': 'lao',
4079         'lt': 'lit',
4080         'lu': 'lub',
4081         'lv': 'lav',
4082         'mg': 'mlg',
4083         'mh': 'mah',
4084         'mi': 'mri',
4085         'mk': 'mkd',
4086         'ml': 'mal',
4087         'mn': 'mon',
4088         'mr': 'mar',
4089         'ms': 'msa',
4090         'mt': 'mlt',
4091         'my': 'mya',
4092         'na': 'nau',
4093         'nb': 'nob',
4094         'nd': 'nde',
4095         'ne': 'nep',
4096         'ng': 'ndo',
4097         'nl': 'nld',
4098         'nn': 'nno',
4099         'no': 'nor',
4100         'nr': 'nbl',
4101         'nv': 'nav',
4102         'ny': 'nya',
4103         'oc': 'oci',
4104         'oj': 'oji',
4105         'om': 'orm',
4106         'or': 'ori',
4107         'os': 'oss',
4108         'pa': 'pan',
4109         'pi': 'pli',
4110         'pl': 'pol',
4111         'ps': 'pus',
4112         'pt': 'por',
4113         'qu': 'que',
4114         'rm': 'roh',
4115         'rn': 'run',
4116         'ro': 'ron',
4117         'ru': 'rus',
4118         'rw': 'kin',
4119         'sa': 'san',
4120         'sc': 'srd',
4121         'sd': 'snd',
4122         'se': 'sme',
4123         'sg': 'sag',
4124         'si': 'sin',
4125         'sk': 'slk',
4126         'sl': 'slv',
4127         'sm': 'smo',
4128         'sn': 'sna',
4129         'so': 'som',
4130         'sq': 'sqi',
4131         'sr': 'srp',
4132         'ss': 'ssw',
4133         'st': 'sot',
4134         'su': 'sun',
4135         'sv': 'swe',
4136         'sw': 'swa',
4137         'ta': 'tam',
4138         'te': 'tel',
4139         'tg': 'tgk',
4140         'th': 'tha',
4141         'ti': 'tir',
4142         'tk': 'tuk',
4143         'tl': 'tgl',
4144         'tn': 'tsn',
4145         'to': 'ton',
4146         'tr': 'tur',
4147         'ts': 'tso',
4148         'tt': 'tat',
4149         'tw': 'twi',
4150         'ty': 'tah',
4151         'ug': 'uig',
4152         'uk': 'ukr',
4153         'ur': 'urd',
4154         'uz': 'uzb',
4155         've': 'ven',
4156         'vi': 'vie',
4157         'vo': 'vol',
4158         'wa': 'wln',
4159         'wo': 'wol',
4160         'xh': 'xho',
4161         'yi': 'yid',
4162         'ji': 'yid',  # Replaced by yi in 1989 revision
4163         'yo': 'yor',
4164         'za': 'zha',
4165         'zh': 'zho',
4166         'zu': 'zul',
4167     }
4168
4169     @classmethod
4170     def short2long(cls, code):
4171         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4172         return cls._lang_map.get(code[:2])
4173
4174     @classmethod
4175     def long2short(cls, code):
4176         """Convert language code from ISO 639-2/T to ISO 639-1"""
4177         for short_name, long_name in cls._lang_map.items():
4178             if long_name == code:
4179                 return short_name
4180
4181
4182 class ISO3166Utils:
4183     # From http://data.okfn.org/data/core/country-list
4184     _country_map = {
4185         'AF': 'Afghanistan',
4186         'AX': 'Åland Islands',
4187         'AL': 'Albania',
4188         'DZ': 'Algeria',
4189         'AS': 'American Samoa',
4190         'AD': 'Andorra',
4191         'AO': 'Angola',
4192         'AI': 'Anguilla',
4193         'AQ': 'Antarctica',
4194         'AG': 'Antigua and Barbuda',
4195         'AR': 'Argentina',
4196         'AM': 'Armenia',
4197         'AW': 'Aruba',
4198         'AU': 'Australia',
4199         'AT': 'Austria',
4200         'AZ': 'Azerbaijan',
4201         'BS': 'Bahamas',
4202         'BH': 'Bahrain',
4203         'BD': 'Bangladesh',
4204         'BB': 'Barbados',
4205         'BY': 'Belarus',
4206         'BE': 'Belgium',
4207         'BZ': 'Belize',
4208         'BJ': 'Benin',
4209         'BM': 'Bermuda',
4210         'BT': 'Bhutan',
4211         'BO': 'Bolivia, Plurinational State of',
4212         'BQ': 'Bonaire, Sint Eustatius and Saba',
4213         'BA': 'Bosnia and Herzegovina',
4214         'BW': 'Botswana',
4215         'BV': 'Bouvet Island',
4216         'BR': 'Brazil',
4217         'IO': 'British Indian Ocean Territory',
4218         'BN': 'Brunei Darussalam',
4219         'BG': 'Bulgaria',
4220         'BF': 'Burkina Faso',
4221         'BI': 'Burundi',
4222         'KH': 'Cambodia',
4223         'CM': 'Cameroon',
4224         'CA': 'Canada',
4225         'CV': 'Cape Verde',
4226         'KY': 'Cayman Islands',
4227         'CF': 'Central African Republic',
4228         'TD': 'Chad',
4229         'CL': 'Chile',
4230         'CN': 'China',
4231         'CX': 'Christmas Island',
4232         'CC': 'Cocos (Keeling) Islands',
4233         'CO': 'Colombia',
4234         'KM': 'Comoros',
4235         'CG': 'Congo',
4236         'CD': 'Congo, the Democratic Republic of the',
4237         'CK': 'Cook Islands',
4238         'CR': 'Costa Rica',
4239         'CI': 'Côte d\'Ivoire',
4240         'HR': 'Croatia',
4241         'CU': 'Cuba',
4242         'CW': 'Curaçao',
4243         'CY': 'Cyprus',
4244         'CZ': 'Czech Republic',
4245         'DK': 'Denmark',
4246         'DJ': 'Djibouti',
4247         'DM': 'Dominica',
4248         'DO': 'Dominican Republic',
4249         'EC': 'Ecuador',
4250         'EG': 'Egypt',
4251         'SV': 'El Salvador',
4252         'GQ': 'Equatorial Guinea',
4253         'ER': 'Eritrea',
4254         'EE': 'Estonia',
4255         'ET': 'Ethiopia',
4256         'FK': 'Falkland Islands (Malvinas)',
4257         'FO': 'Faroe Islands',
4258         'FJ': 'Fiji',
4259         'FI': 'Finland',
4260         'FR': 'France',
4261         'GF': 'French Guiana',
4262         'PF': 'French Polynesia',
4263         'TF': 'French Southern Territories',
4264         'GA': 'Gabon',
4265         'GM': 'Gambia',
4266         'GE': 'Georgia',
4267         'DE': 'Germany',
4268         'GH': 'Ghana',
4269         'GI': 'Gibraltar',
4270         'GR': 'Greece',
4271         'GL': 'Greenland',
4272         'GD': 'Grenada',
4273         'GP': 'Guadeloupe',
4274         'GU': 'Guam',
4275         'GT': 'Guatemala',
4276         'GG': 'Guernsey',
4277         'GN': 'Guinea',
4278         'GW': 'Guinea-Bissau',
4279         'GY': 'Guyana',
4280         'HT': 'Haiti',
4281         'HM': 'Heard Island and McDonald Islands',
4282         'VA': 'Holy See (Vatican City State)',
4283         'HN': 'Honduras',
4284         'HK': 'Hong Kong',
4285         'HU': 'Hungary',
4286         'IS': 'Iceland',
4287         'IN': 'India',
4288         'ID': 'Indonesia',
4289         'IR': 'Iran, Islamic Republic of',
4290         'IQ': 'Iraq',
4291         'IE': 'Ireland',
4292         'IM': 'Isle of Man',
4293         'IL': 'Israel',
4294         'IT': 'Italy',
4295         'JM': 'Jamaica',
4296         'JP': 'Japan',
4297         'JE': 'Jersey',
4298         'JO': 'Jordan',
4299         'KZ': 'Kazakhstan',
4300         'KE': 'Kenya',
4301         'KI': 'Kiribati',
4302         'KP': 'Korea, Democratic People\'s Republic of',
4303         'KR': 'Korea, Republic of',
4304         'KW': 'Kuwait',
4305         'KG': 'Kyrgyzstan',
4306         'LA': 'Lao People\'s Democratic Republic',
4307         'LV': 'Latvia',
4308         'LB': 'Lebanon',
4309         'LS': 'Lesotho',
4310         'LR': 'Liberia',
4311         'LY': 'Libya',
4312         'LI': 'Liechtenstein',
4313         'LT': 'Lithuania',
4314         'LU': 'Luxembourg',
4315         'MO': 'Macao',
4316         'MK': 'Macedonia, the Former Yugoslav Republic of',
4317         'MG': 'Madagascar',
4318         'MW': 'Malawi',
4319         'MY': 'Malaysia',
4320         'MV': 'Maldives',
4321         'ML': 'Mali',
4322         'MT': 'Malta',
4323         'MH': 'Marshall Islands',
4324         'MQ': 'Martinique',
4325         'MR': 'Mauritania',
4326         'MU': 'Mauritius',
4327         'YT': 'Mayotte',
4328         'MX': 'Mexico',
4329         'FM': 'Micronesia, Federated States of',
4330         'MD': 'Moldova, Republic of',
4331         'MC': 'Monaco',
4332         'MN': 'Mongolia',
4333         'ME': 'Montenegro',
4334         'MS': 'Montserrat',
4335         'MA': 'Morocco',
4336         'MZ': 'Mozambique',
4337         'MM': 'Myanmar',
4338         'NA': 'Namibia',
4339         'NR': 'Nauru',
4340         'NP': 'Nepal',
4341         'NL': 'Netherlands',
4342         'NC': 'New Caledonia',
4343         'NZ': 'New Zealand',
4344         'NI': 'Nicaragua',
4345         'NE': 'Niger',
4346         'NG': 'Nigeria',
4347         'NU': 'Niue',
4348         'NF': 'Norfolk Island',
4349         'MP': 'Northern Mariana Islands',
4350         'NO': 'Norway',
4351         'OM': 'Oman',
4352         'PK': 'Pakistan',
4353         'PW': 'Palau',
4354         'PS': 'Palestine, State of',
4355         'PA': 'Panama',
4356         'PG': 'Papua New Guinea',
4357         'PY': 'Paraguay',
4358         'PE': 'Peru',
4359         'PH': 'Philippines',
4360         'PN': 'Pitcairn',
4361         'PL': 'Poland',
4362         'PT': 'Portugal',
4363         'PR': 'Puerto Rico',
4364         'QA': 'Qatar',
4365         'RE': 'Réunion',
4366         'RO': 'Romania',
4367         'RU': 'Russian Federation',
4368         'RW': 'Rwanda',
4369         'BL': 'Saint Barthélemy',
4370         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4371         'KN': 'Saint Kitts and Nevis',
4372         'LC': 'Saint Lucia',
4373         'MF': 'Saint Martin (French part)',
4374         'PM': 'Saint Pierre and Miquelon',
4375         'VC': 'Saint Vincent and the Grenadines',
4376         'WS': 'Samoa',
4377         'SM': 'San Marino',
4378         'ST': 'Sao Tome and Principe',
4379         'SA': 'Saudi Arabia',
4380         'SN': 'Senegal',
4381         'RS': 'Serbia',
4382         'SC': 'Seychelles',
4383         'SL': 'Sierra Leone',
4384         'SG': 'Singapore',
4385         'SX': 'Sint Maarten (Dutch part)',
4386         'SK': 'Slovakia',
4387         'SI': 'Slovenia',
4388         'SB': 'Solomon Islands',
4389         'SO': 'Somalia',
4390         'ZA': 'South Africa',
4391         'GS': 'South Georgia and the South Sandwich Islands',
4392         'SS': 'South Sudan',
4393         'ES': 'Spain',
4394         'LK': 'Sri Lanka',
4395         'SD': 'Sudan',
4396         'SR': 'Suriname',
4397         'SJ': 'Svalbard and Jan Mayen',
4398         'SZ': 'Swaziland',
4399         'SE': 'Sweden',
4400         'CH': 'Switzerland',
4401         'SY': 'Syrian Arab Republic',
4402         'TW': 'Taiwan, Province of China',
4403         'TJ': 'Tajikistan',
4404         'TZ': 'Tanzania, United Republic of',
4405         'TH': 'Thailand',
4406         'TL': 'Timor-Leste',
4407         'TG': 'Togo',
4408         'TK': 'Tokelau',
4409         'TO': 'Tonga',
4410         'TT': 'Trinidad and Tobago',
4411         'TN': 'Tunisia',
4412         'TR': 'Turkey',
4413         'TM': 'Turkmenistan',
4414         'TC': 'Turks and Caicos Islands',
4415         'TV': 'Tuvalu',
4416         'UG': 'Uganda',
4417         'UA': 'Ukraine',
4418         'AE': 'United Arab Emirates',
4419         'GB': 'United Kingdom',
4420         'US': 'United States',
4421         'UM': 'United States Minor Outlying Islands',
4422         'UY': 'Uruguay',
4423         'UZ': 'Uzbekistan',
4424         'VU': 'Vanuatu',
4425         'VE': 'Venezuela, Bolivarian Republic of',
4426         'VN': 'Viet Nam',
4427         'VG': 'Virgin Islands, British',
4428         'VI': 'Virgin Islands, U.S.',
4429         'WF': 'Wallis and Futuna',
4430         'EH': 'Western Sahara',
4431         'YE': 'Yemen',
4432         'ZM': 'Zambia',
4433         'ZW': 'Zimbabwe',
4434         # Not ISO 3166 codes, but used for IP blocks
4435         'AP': 'Asia/Pacific Region',
4436         'EU': 'Europe',
4437     }
4438
4439     @classmethod
4440     def short2full(cls, code):
4441         """Convert an ISO 3166-2 country code to the corresponding full name"""
4442         return cls._country_map.get(code.upper())
4443
4444
4445 class GeoUtils:
4446     # Major IPv4 address blocks per country
4447     _country_ip_map = {
4448         'AD': '46.172.224.0/19',
4449         'AE': '94.200.0.0/13',
4450         'AF': '149.54.0.0/17',
4451         'AG': '209.59.64.0/18',
4452         'AI': '204.14.248.0/21',
4453         'AL': '46.99.0.0/16',
4454         'AM': '46.70.0.0/15',
4455         'AO': '105.168.0.0/13',
4456         'AP': '182.50.184.0/21',
4457         'AQ': '23.154.160.0/24',
4458         'AR': '181.0.0.0/12',
4459         'AS': '202.70.112.0/20',
4460         'AT': '77.116.0.0/14',
4461         'AU': '1.128.0.0/11',
4462         'AW': '181.41.0.0/18',
4463         'AX': '185.217.4.0/22',
4464         'AZ': '5.197.0.0/16',
4465         'BA': '31.176.128.0/17',
4466         'BB': '65.48.128.0/17',
4467         'BD': '114.130.0.0/16',
4468         'BE': '57.0.0.0/8',
4469         'BF': '102.178.0.0/15',
4470         'BG': '95.42.0.0/15',
4471         'BH': '37.131.0.0/17',
4472         'BI': '154.117.192.0/18',
4473         'BJ': '137.255.0.0/16',
4474         'BL': '185.212.72.0/23',
4475         'BM': '196.12.64.0/18',
4476         'BN': '156.31.0.0/16',
4477         'BO': '161.56.0.0/16',
4478         'BQ': '161.0.80.0/20',
4479         'BR': '191.128.0.0/12',
4480         'BS': '24.51.64.0/18',
4481         'BT': '119.2.96.0/19',
4482         'BW': '168.167.0.0/16',
4483         'BY': '178.120.0.0/13',
4484         'BZ': '179.42.192.0/18',
4485         'CA': '99.224.0.0/11',
4486         'CD': '41.243.0.0/16',
4487         'CF': '197.242.176.0/21',
4488         'CG': '160.113.0.0/16',
4489         'CH': '85.0.0.0/13',
4490         'CI': '102.136.0.0/14',
4491         'CK': '202.65.32.0/19',
4492         'CL': '152.172.0.0/14',
4493         'CM': '102.244.0.0/14',
4494         'CN': '36.128.0.0/10',
4495         'CO': '181.240.0.0/12',
4496         'CR': '201.192.0.0/12',
4497         'CU': '152.206.0.0/15',
4498         'CV': '165.90.96.0/19',
4499         'CW': '190.88.128.0/17',
4500         'CY': '31.153.0.0/16',
4501         'CZ': '88.100.0.0/14',
4502         'DE': '53.0.0.0/8',
4503         'DJ': '197.241.0.0/17',
4504         'DK': '87.48.0.0/12',
4505         'DM': '192.243.48.0/20',
4506         'DO': '152.166.0.0/15',
4507         'DZ': '41.96.0.0/12',
4508         'EC': '186.68.0.0/15',
4509         'EE': '90.190.0.0/15',
4510         'EG': '156.160.0.0/11',
4511         'ER': '196.200.96.0/20',
4512         'ES': '88.0.0.0/11',
4513         'ET': '196.188.0.0/14',
4514         'EU': '2.16.0.0/13',
4515         'FI': '91.152.0.0/13',
4516         'FJ': '144.120.0.0/16',
4517         'FK': '80.73.208.0/21',
4518         'FM': '119.252.112.0/20',
4519         'FO': '88.85.32.0/19',
4520         'FR': '90.0.0.0/9',
4521         'GA': '41.158.0.0/15',
4522         'GB': '25.0.0.0/8',
4523         'GD': '74.122.88.0/21',
4524         'GE': '31.146.0.0/16',
4525         'GF': '161.22.64.0/18',
4526         'GG': '62.68.160.0/19',
4527         'GH': '154.160.0.0/12',
4528         'GI': '95.164.0.0/16',
4529         'GL': '88.83.0.0/19',
4530         'GM': '160.182.0.0/15',
4531         'GN': '197.149.192.0/18',
4532         'GP': '104.250.0.0/19',
4533         'GQ': '105.235.224.0/20',
4534         'GR': '94.64.0.0/13',
4535         'GT': '168.234.0.0/16',
4536         'GU': '168.123.0.0/16',
4537         'GW': '197.214.80.0/20',
4538         'GY': '181.41.64.0/18',
4539         'HK': '113.252.0.0/14',
4540         'HN': '181.210.0.0/16',
4541         'HR': '93.136.0.0/13',
4542         'HT': '148.102.128.0/17',
4543         'HU': '84.0.0.0/14',
4544         'ID': '39.192.0.0/10',
4545         'IE': '87.32.0.0/12',
4546         'IL': '79.176.0.0/13',
4547         'IM': '5.62.80.0/20',
4548         'IN': '117.192.0.0/10',
4549         'IO': '203.83.48.0/21',
4550         'IQ': '37.236.0.0/14',
4551         'IR': '2.176.0.0/12',
4552         'IS': '82.221.0.0/16',
4553         'IT': '79.0.0.0/10',
4554         'JE': '87.244.64.0/18',
4555         'JM': '72.27.0.0/17',
4556         'JO': '176.29.0.0/16',
4557         'JP': '133.0.0.0/8',
4558         'KE': '105.48.0.0/12',
4559         'KG': '158.181.128.0/17',
4560         'KH': '36.37.128.0/17',
4561         'KI': '103.25.140.0/22',
4562         'KM': '197.255.224.0/20',
4563         'KN': '198.167.192.0/19',
4564         'KP': '175.45.176.0/22',
4565         'KR': '175.192.0.0/10',
4566         'KW': '37.36.0.0/14',
4567         'KY': '64.96.0.0/15',
4568         'KZ': '2.72.0.0/13',
4569         'LA': '115.84.64.0/18',
4570         'LB': '178.135.0.0/16',
4571         'LC': '24.92.144.0/20',
4572         'LI': '82.117.0.0/19',
4573         'LK': '112.134.0.0/15',
4574         'LR': '102.183.0.0/16',
4575         'LS': '129.232.0.0/17',
4576         'LT': '78.56.0.0/13',
4577         'LU': '188.42.0.0/16',
4578         'LV': '46.109.0.0/16',
4579         'LY': '41.252.0.0/14',
4580         'MA': '105.128.0.0/11',
4581         'MC': '88.209.64.0/18',
4582         'MD': '37.246.0.0/16',
4583         'ME': '178.175.0.0/17',
4584         'MF': '74.112.232.0/21',
4585         'MG': '154.126.0.0/17',
4586         'MH': '117.103.88.0/21',
4587         'MK': '77.28.0.0/15',
4588         'ML': '154.118.128.0/18',
4589         'MM': '37.111.0.0/17',
4590         'MN': '49.0.128.0/17',
4591         'MO': '60.246.0.0/16',
4592         'MP': '202.88.64.0/20',
4593         'MQ': '109.203.224.0/19',
4594         'MR': '41.188.64.0/18',
4595         'MS': '208.90.112.0/22',
4596         'MT': '46.11.0.0/16',
4597         'MU': '105.16.0.0/12',
4598         'MV': '27.114.128.0/18',
4599         'MW': '102.70.0.0/15',
4600         'MX': '187.192.0.0/11',
4601         'MY': '175.136.0.0/13',
4602         'MZ': '197.218.0.0/15',
4603         'NA': '41.182.0.0/16',
4604         'NC': '101.101.0.0/18',
4605         'NE': '197.214.0.0/18',
4606         'NF': '203.17.240.0/22',
4607         'NG': '105.112.0.0/12',
4608         'NI': '186.76.0.0/15',
4609         'NL': '145.96.0.0/11',
4610         'NO': '84.208.0.0/13',
4611         'NP': '36.252.0.0/15',
4612         'NR': '203.98.224.0/19',
4613         'NU': '49.156.48.0/22',
4614         'NZ': '49.224.0.0/14',
4615         'OM': '5.36.0.0/15',
4616         'PA': '186.72.0.0/15',
4617         'PE': '186.160.0.0/14',
4618         'PF': '123.50.64.0/18',
4619         'PG': '124.240.192.0/19',
4620         'PH': '49.144.0.0/13',
4621         'PK': '39.32.0.0/11',
4622         'PL': '83.0.0.0/11',
4623         'PM': '70.36.0.0/20',
4624         'PR': '66.50.0.0/16',
4625         'PS': '188.161.0.0/16',
4626         'PT': '85.240.0.0/13',
4627         'PW': '202.124.224.0/20',
4628         'PY': '181.120.0.0/14',
4629         'QA': '37.210.0.0/15',
4630         'RE': '102.35.0.0/16',
4631         'RO': '79.112.0.0/13',
4632         'RS': '93.86.0.0/15',
4633         'RU': '5.136.0.0/13',
4634         'RW': '41.186.0.0/16',
4635         'SA': '188.48.0.0/13',
4636         'SB': '202.1.160.0/19',
4637         'SC': '154.192.0.0/11',
4638         'SD': '102.120.0.0/13',
4639         'SE': '78.64.0.0/12',
4640         'SG': '8.128.0.0/10',
4641         'SI': '188.196.0.0/14',
4642         'SK': '78.98.0.0/15',
4643         'SL': '102.143.0.0/17',
4644         'SM': '89.186.32.0/19',
4645         'SN': '41.82.0.0/15',
4646         'SO': '154.115.192.0/18',
4647         'SR': '186.179.128.0/17',
4648         'SS': '105.235.208.0/21',
4649         'ST': '197.159.160.0/19',
4650         'SV': '168.243.0.0/16',
4651         'SX': '190.102.0.0/20',
4652         'SY': '5.0.0.0/16',
4653         'SZ': '41.84.224.0/19',
4654         'TC': '65.255.48.0/20',
4655         'TD': '154.68.128.0/19',
4656         'TG': '196.168.0.0/14',
4657         'TH': '171.96.0.0/13',
4658         'TJ': '85.9.128.0/18',
4659         'TK': '27.96.24.0/21',
4660         'TL': '180.189.160.0/20',
4661         'TM': '95.85.96.0/19',
4662         'TN': '197.0.0.0/11',
4663         'TO': '175.176.144.0/21',
4664         'TR': '78.160.0.0/11',
4665         'TT': '186.44.0.0/15',
4666         'TV': '202.2.96.0/19',
4667         'TW': '120.96.0.0/11',
4668         'TZ': '156.156.0.0/14',
4669         'UA': '37.52.0.0/14',
4670         'UG': '102.80.0.0/13',
4671         'US': '6.0.0.0/8',
4672         'UY': '167.56.0.0/13',
4673         'UZ': '84.54.64.0/18',
4674         'VA': '212.77.0.0/19',
4675         'VC': '207.191.240.0/21',
4676         'VE': '186.88.0.0/13',
4677         'VG': '66.81.192.0/20',
4678         'VI': '146.226.0.0/16',
4679         'VN': '14.160.0.0/11',
4680         'VU': '202.80.32.0/20',
4681         'WF': '117.20.32.0/21',
4682         'WS': '202.4.32.0/19',
4683         'YE': '134.35.0.0/16',
4684         'YT': '41.242.116.0/22',
4685         'ZA': '41.0.0.0/11',
4686         'ZM': '102.144.0.0/13',
4687         'ZW': '102.177.192.0/18',
4688     }
4689
4690     @classmethod
4691     def random_ipv4(cls, code_or_block):
4692         if len(code_or_block) == 2:
4693             block = cls._country_ip_map.get(code_or_block.upper())
4694             if not block:
4695                 return None
4696         else:
4697             block = code_or_block
4698         addr, preflen = block.split('/')
4699         addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
4700         addr_max = addr_min | (0xffffffff >> int(preflen))
4701         return str(socket.inet_ntoa(
4702             struct.pack('!L', random.randint(addr_min, addr_max))))
4703
4704
4705 class PerRequestProxyHandler(urllib.request.ProxyHandler):
4706     def __init__(self, proxies=None):
4707         # Set default handlers
4708         for type in ('http', 'https'):
4709             setattr(self, '%s_open' % type,
4710                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4711                         meth(r, proxy, type))
4712         urllib.request.ProxyHandler.__init__(self, proxies)
4713
4714     def proxy_open(self, req, proxy, type):
4715         req_proxy = req.headers.get('Ytdl-request-proxy')
4716         if req_proxy is not None:
4717             proxy = req_proxy
4718             del req.headers['Ytdl-request-proxy']
4719
4720         if proxy == '__noproxy__':
4721             return None  # No Proxy
4722         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4723             req.add_header('Ytdl-socks-proxy', proxy)
4724             # yt-dlp's http/https handlers do wrapping the socket with socks
4725             return None
4726         return urllib.request.ProxyHandler.proxy_open(
4727             self, req, proxy, type)
4728
4729
4730 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4731 # released into Public Domain
4732 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4733
4734 def long_to_bytes(n, blocksize=0):
4735     """long_to_bytes(n:long, blocksize:int) : string
4736     Convert a long integer to a byte string.
4737
4738     If optional blocksize is given and greater than zero, pad the front of the
4739     byte string with binary zeros so that the length is a multiple of
4740     blocksize.
4741     """
4742     # after much testing, this algorithm was deemed to be the fastest
4743     s = b''
4744     n = int(n)
4745     while n > 0:
4746         s = struct.pack('>I', n & 0xffffffff) + s
4747         n = n >> 32
4748     # strip off leading zeros
4749     for i in range(len(s)):
4750         if s[i] != b'\000'[0]:
4751             break
4752     else:
4753         # only happens when n == 0
4754         s = b'\000'
4755         i = 0
4756     s = s[i:]
4757     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4758     # de-padding being done above, but sigh...
4759     if blocksize > 0 and len(s) % blocksize:
4760         s = (blocksize - len(s) % blocksize) * b'\000' + s
4761     return s
4762
4763
4764 def bytes_to_long(s):
4765     """bytes_to_long(string) : long
4766     Convert a byte string to a long integer.
4767
4768     This is (essentially) the inverse of long_to_bytes().
4769     """
4770     acc = 0
4771     length = len(s)
4772     if length % 4:
4773         extra = (4 - length % 4)
4774         s = b'\000' * extra + s
4775         length = length + extra
4776     for i in range(0, length, 4):
4777         acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
4778     return acc
4779
4780
4781 def ohdave_rsa_encrypt(data, exponent, modulus):
4782     '''
4783     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4784
4785     Input:
4786         data: data to encrypt, bytes-like object
4787         exponent, modulus: parameter e and N of RSA algorithm, both integer
4788     Output: hex string of encrypted data
4789
4790     Limitation: supports one block encryption only
4791     '''
4792
4793     payload = int(binascii.hexlify(data[::-1]), 16)
4794     encrypted = pow(payload, exponent, modulus)
4795     return '%x' % encrypted
4796
4797
4798 def pkcs1pad(data, length):
4799     """
4800     Padding input data with PKCS#1 scheme
4801
4802     @param {int[]} data        input data
4803     @param {int}   length      target length
4804     @returns {int[]}           padded data
4805     """
4806     if len(data) > length - 11:
4807         raise ValueError('Input data too long for PKCS#1 padding')
4808
4809     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4810     return [0, 2] + pseudo_random + [0] + data
4811
4812
4813 def _base_n_table(n, table):
4814     if not table and not n:
4815         raise ValueError('Either table or n must be specified')
4816     table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
4817
4818     if n and n != len(table):
4819         raise ValueError(f'base {n} exceeds table length {len(table)}')
4820     return table
4821
4822
4823 def encode_base_n(num, n=None, table=None):
4824     """Convert given int to a base-n string"""
4825     table = _base_n_table(n, table)
4826     if not num:
4827         return table[0]
4828
4829     result, base = '', len(table)
4830     while num:
4831         result = table[num % base] + result
4832         num = num // base
4833     return result
4834
4835
4836 def decode_base_n(string, n=None, table=None):
4837     """Convert given base-n string to int"""
4838     table = {char: index for index, char in enumerate(_base_n_table(n, table))}
4839     result, base = 0, len(table)
4840     for char in string:
4841         result = result * base + table[char]
4842     return result
4843
4844
4845 def decode_base(value, digits):
4846     write_string('DeprecationWarning: yt_dlp.utils.decode_base is deprecated '
4847                  'and may be removed in a future version. Use yt_dlp.decode_base_n instead')
4848     return decode_base_n(value, table=digits)
4849
4850
4851 def decode_packed_codes(code):
4852     mobj = re.search(PACKED_CODES_RE, code)
4853     obfuscated_code, base, count, symbols = mobj.groups()
4854     base = int(base)
4855     count = int(count)
4856     symbols = symbols.split('|')
4857     symbol_table = {}
4858
4859     while count:
4860         count -= 1
4861         base_n_count = encode_base_n(count, base)
4862         symbol_table[base_n_count] = symbols[count] or base_n_count
4863
4864     return re.sub(
4865         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4866         obfuscated_code)
4867
4868
4869 def caesar(s, alphabet, shift):
4870     if shift == 0:
4871         return s
4872     l = len(alphabet)
4873     return ''.join(
4874         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4875         for c in s)
4876
4877
4878 def rot47(s):
4879     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4880
4881
4882 def parse_m3u8_attributes(attrib):
4883     info = {}
4884     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4885         if val.startswith('"'):
4886             val = val[1:-1]
4887         info[key] = val
4888     return info
4889
4890
4891 def urshift(val, n):
4892     return val >> n if val >= 0 else (val + 0x100000000) >> n
4893
4894
4895 # Based on png2str() written by @gdkchan and improved by @yokrysty
4896 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4897 def decode_png(png_data):
4898     # Reference: https://www.w3.org/TR/PNG/
4899     header = png_data[8:]
4900
4901     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4902         raise OSError('Not a valid PNG file.')
4903
4904     int_map = {1: '>B', 2: '>H', 4: '>I'}
4905     unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
4906
4907     chunks = []
4908
4909     while header:
4910         length = unpack_integer(header[:4])
4911         header = header[4:]
4912
4913         chunk_type = header[:4]
4914         header = header[4:]
4915
4916         chunk_data = header[:length]
4917         header = header[length:]
4918
4919         header = header[4:]  # Skip CRC
4920
4921         chunks.append({
4922             'type': chunk_type,
4923             'length': length,
4924             'data': chunk_data
4925         })
4926
4927     ihdr = chunks[0]['data']
4928
4929     width = unpack_integer(ihdr[:4])
4930     height = unpack_integer(ihdr[4:8])
4931
4932     idat = b''
4933
4934     for chunk in chunks:
4935         if chunk['type'] == b'IDAT':
4936             idat += chunk['data']
4937
4938     if not idat:
4939         raise OSError('Unable to read PNG data.')
4940
4941     decompressed_data = bytearray(zlib.decompress(idat))
4942
4943     stride = width * 3
4944     pixels = []
4945
4946     def _get_pixel(idx):
4947         x = idx % stride
4948         y = idx // stride
4949         return pixels[y][x]
4950
4951     for y in range(height):
4952         basePos = y * (1 + stride)
4953         filter_type = decompressed_data[basePos]
4954
4955         current_row = []
4956
4957         pixels.append(current_row)
4958
4959         for x in range(stride):
4960             color = decompressed_data[1 + basePos + x]
4961             basex = y * stride + x
4962             left = 0
4963             up = 0
4964
4965             if x > 2:
4966                 left = _get_pixel(basex - 3)
4967             if y > 0:
4968                 up = _get_pixel(basex - stride)
4969
4970             if filter_type == 1:  # Sub
4971                 color = (color + left) & 0xff
4972             elif filter_type == 2:  # Up
4973                 color = (color + up) & 0xff
4974             elif filter_type == 3:  # Average
4975                 color = (color + ((left + up) >> 1)) & 0xff
4976             elif filter_type == 4:  # Paeth
4977                 a = left
4978                 b = up
4979                 c = 0
4980
4981                 if x > 2 and y > 0:
4982                     c = _get_pixel(basex - stride - 3)
4983
4984                 p = a + b - c
4985
4986                 pa = abs(p - a)
4987                 pb = abs(p - b)
4988                 pc = abs(p - c)
4989
4990                 if pa <= pb and pa <= pc:
4991                     color = (color + a) & 0xff
4992                 elif pb <= pc:
4993                     color = (color + b) & 0xff
4994                 else:
4995                     color = (color + c) & 0xff
4996
4997             current_row.append(color)
4998
4999     return width, height, pixels
5000
5001
5002 def write_xattr(path, key, value):
5003     # Windows: Write xattrs to NTFS Alternate Data Streams:
5004     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5005     if compat_os_name == 'nt':
5006         assert ':' not in key
5007         assert os.path.exists(path)
5008
5009         try:
5010             with open(f'{path}:{key}', 'wb') as f:
5011                 f.write(value)
5012         except OSError as e:
5013             raise XAttrMetadataError(e.errno, e.strerror)
5014         return
5015
5016     # UNIX Method 1. Use xattrs/pyxattrs modules
5017
5018     setxattr = None
5019     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
5020         # Unicode arguments are not supported in pyxattr until version 0.5.0
5021         # See https://github.com/ytdl-org/youtube-dl/issues/5498
5022         if version_tuple(xattr.__version__) >= (0, 5, 0):
5023             setxattr = xattr.set
5024     elif xattr:
5025         setxattr = xattr.setxattr
5026
5027     if setxattr:
5028         try:
5029             setxattr(path, key, value)
5030         except OSError as e:
5031             raise XAttrMetadataError(e.errno, e.strerror)
5032         return
5033
5034     # UNIX Method 2. Use setfattr/xattr executables
5035     exe = ('setfattr' if check_executable('setfattr', ['--version'])
5036            else 'xattr' if check_executable('xattr', ['-h']) else None)
5037     if not exe:
5038         raise XAttrUnavailableError(
5039             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
5040             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
5041
5042     value = value.decode()
5043     try:
5044         _, stderr, returncode = Popen.run(
5045             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
5046             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5047     except OSError as e:
5048         raise XAttrMetadataError(e.errno, e.strerror)
5049     if returncode:
5050         raise XAttrMetadataError(returncode, stderr)
5051
5052
5053 def random_birthday(year_field, month_field, day_field):
5054     start_date = datetime.date(1950, 1, 1)
5055     end_date = datetime.date(1995, 12, 31)
5056     offset = random.randint(0, (end_date - start_date).days)
5057     random_date = start_date + datetime.timedelta(offset)
5058     return {
5059         year_field: str(random_date.year),
5060         month_field: str(random_date.month),
5061         day_field: str(random_date.day),
5062     }
5063
5064
5065 # Templates for internet shortcut files, which are plain text files.
5066 DOT_URL_LINK_TEMPLATE = '''\
5067 [InternetShortcut]
5068 URL=%(url)s
5069 '''
5070
5071 DOT_WEBLOC_LINK_TEMPLATE = '''\
5072 <?xml version="1.0" encoding="UTF-8"?>
5073 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5074 <plist version="1.0">
5075 <dict>
5076 \t<key>URL</key>
5077 \t<string>%(url)s</string>
5078 </dict>
5079 </plist>
5080 '''
5081
5082 DOT_DESKTOP_LINK_TEMPLATE = '''\
5083 [Desktop Entry]
5084 Encoding=UTF-8
5085 Name=%(filename)s
5086 Type=Link
5087 URL=%(url)s
5088 Icon=text-html
5089 '''
5090
5091 LINK_TEMPLATES = {
5092     'url': DOT_URL_LINK_TEMPLATE,
5093     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5094     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5095 }
5096
5097
5098 def iri_to_uri(iri):
5099     """
5100     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5101
5102     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5103     """
5104
5105     iri_parts = urllib.parse.urlparse(iri)
5106
5107     if '[' in iri_parts.netloc:
5108         raise ValueError('IPv6 URIs are not, yet, supported.')
5109         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5110
5111     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5112
5113     net_location = ''
5114     if iri_parts.username:
5115         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5116         if iri_parts.password is not None:
5117             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5118         net_location += '@'
5119
5120     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5121     # The 'idna' encoding produces ASCII text.
5122     if iri_parts.port is not None and iri_parts.port != 80:
5123         net_location += ':' + str(iri_parts.port)
5124
5125     return urllib.parse.urlunparse(
5126         (iri_parts.scheme,
5127             net_location,
5128
5129             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5130
5131             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5132             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5133
5134             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5135             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5136
5137             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5138
5139     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5140
5141
5142 def to_high_limit_path(path):
5143     if sys.platform in ['win32', 'cygwin']:
5144         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5145         return '\\\\?\\' + os.path.abspath(path)
5146
5147     return path
5148
5149
5150 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
5151     val = traverse_obj(obj, *variadic(field))
5152     if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
5153         return default
5154     return template % func(val)
5155
5156
5157 def clean_podcast_url(url):
5158     return re.sub(r'''(?x)
5159         (?:
5160             (?:
5161                 chtbl\.com/track|
5162                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5163                 play\.podtrac\.com
5164             )/[^/]+|
5165             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5166             flex\.acast\.com|
5167             pd(?:
5168                 cn\.co| # https://podcorn.com/analytics-prefix/
5169                 st\.fm # https://podsights.com/docs/
5170             )/e
5171         )/''', '', url)
5172
5173
5174 _HEX_TABLE = '0123456789abcdef'
5175
5176
5177 def random_uuidv4():
5178     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5179
5180
5181 def make_dir(path, to_screen=None):
5182     try:
5183         dn = os.path.dirname(path)
5184         if dn and not os.path.exists(dn):
5185             os.makedirs(dn)
5186         return True
5187     except OSError as err:
5188         if callable(to_screen) is not None:
5189             to_screen('unable to create directory ' + error_to_compat_str(err))
5190         return False
5191
5192
5193 def get_executable_path():
5194     from .update import _get_variant_and_executable_path
5195
5196     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5197
5198
5199 def load_plugins(name, suffix, namespace):
5200     classes = {}
5201     with contextlib.suppress(FileNotFoundError):
5202         plugins_spec = importlib.util.spec_from_file_location(
5203             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5204         plugins = importlib.util.module_from_spec(plugins_spec)
5205         sys.modules[plugins_spec.name] = plugins
5206         plugins_spec.loader.exec_module(plugins)
5207         for name in dir(plugins):
5208             if name in namespace:
5209                 continue
5210             if not name.endswith(suffix):
5211                 continue
5212             klass = getattr(plugins, name)
5213             classes[name] = namespace[name] = klass
5214     return classes
5215
5216
5217 def traverse_obj(
5218         obj, *path_list, default=None, expected_type=None, get_all=True,
5219         casesense=True, is_user_input=False, traverse_string=False):
5220     ''' Traverse nested list/dict/tuple
5221     @param path_list        A list of paths which are checked one by one.
5222                             Each path is a list of keys where each key is a:
5223                               - None:     Do nothing
5224                               - string:   A dictionary key
5225                               - int:      An index into a list
5226                               - tuple:    A list of keys all of which will be traversed
5227                               - Ellipsis: Fetch all values in the object
5228                               - Function: Takes the key and value as arguments
5229                                           and returns whether the key matches or not
5230     @param default          Default value to return
5231     @param expected_type    Only accept final value of this type (Can also be any callable)
5232     @param get_all          Return all the values obtained from a path or only the first one
5233     @param casesense        Whether to consider dictionary keys as case sensitive
5234     @param is_user_input    Whether the keys are generated from user input. If True,
5235                             strings are converted to int/slice if necessary
5236     @param traverse_string  Whether to traverse inside strings. If True, any
5237                             non-compatible object will also be converted into a string
5238     # TODO: Write tests
5239     '''
5240     if not casesense:
5241         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
5242         path_list = (map(_lower, variadic(path)) for path in path_list)
5243
5244     def _traverse_obj(obj, path, _current_depth=0):
5245         nonlocal depth
5246         path = tuple(variadic(path))
5247         for i, key in enumerate(path):
5248             if None in (key, obj):
5249                 return obj
5250             if isinstance(key, (list, tuple)):
5251                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
5252                 key = ...
5253             if key is ...:
5254                 obj = (obj.values() if isinstance(obj, dict)
5255                        else obj if isinstance(obj, (list, tuple, LazyList))
5256                        else str(obj) if traverse_string else [])
5257                 _current_depth += 1
5258                 depth = max(depth, _current_depth)
5259                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
5260             elif callable(key):
5261                 if isinstance(obj, (list, tuple, LazyList)):
5262                     obj = enumerate(obj)
5263                 elif isinstance(obj, dict):
5264                     obj = obj.items()
5265                 else:
5266                     if not traverse_string:
5267                         return None
5268                     obj = str(obj)
5269                 _current_depth += 1
5270                 depth = max(depth, _current_depth)
5271                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
5272             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
5273                 obj = (obj.get(key) if casesense or (key in obj)
5274                        else next((v for k, v in obj.items() if _lower(k) == key), None))
5275             else:
5276                 if is_user_input:
5277                     key = (int_or_none(key) if ':' not in key
5278                            else slice(*map(int_or_none, key.split(':'))))
5279                     if key == slice(None):
5280                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
5281                 if not isinstance(key, (int, slice)):
5282                     return None
5283                 if not isinstance(obj, (list, tuple, LazyList)):
5284                     if not traverse_string:
5285                         return None
5286                     obj = str(obj)
5287                 try:
5288                     obj = obj[key]
5289                 except IndexError:
5290                     return None
5291         return obj
5292
5293     if isinstance(expected_type, type):
5294         type_test = lambda val: val if isinstance(val, expected_type) else None
5295     else:
5296         type_test = expected_type or IDENTITY
5297
5298     for path in path_list:
5299         depth = 0
5300         val = _traverse_obj(obj, path)
5301         if val is not None:
5302             if depth:
5303                 for _ in range(depth - 1):
5304                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5305                 val = [v for v in map(type_test, val) if v is not None]
5306                 if val:
5307                     return val if get_all else val[0]
5308             else:
5309                 val = type_test(val)
5310                 if val is not None:
5311                     return val
5312     return default
5313
5314
5315 def traverse_dict(dictn, keys, casesense=True):
5316     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5317                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5318     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5319
5320
5321 def get_first(obj, keys, **kwargs):
5322     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5323
5324
5325 def variadic(x, allowed_types=(str, bytes, dict)):
5326     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5327
5328
5329 def time_seconds(**kwargs):
5330     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5331     return t.timestamp()
5332
5333
5334 # create a JSON Web Signature (jws) with HS256 algorithm
5335 # the resulting format is in JWS Compact Serialization
5336 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5337 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5338 def jwt_encode_hs256(payload_data, key, headers={}):
5339     header_data = {
5340         'alg': 'HS256',
5341         'typ': 'JWT',
5342     }
5343     if headers:
5344         header_data.update(headers)
5345     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5346     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5347     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5348     signature_b64 = base64.b64encode(h.digest())
5349     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5350     return token
5351
5352
5353 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5354 def jwt_decode_hs256(jwt):
5355     header_b64, payload_b64, signature_b64 = jwt.split('.')
5356     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5357     return payload_data
5358
5359
5360 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5361
5362
5363 @functools.cache
5364 def supports_terminal_sequences(stream):
5365     if compat_os_name == 'nt':
5366         if not WINDOWS_VT_MODE:
5367             return False
5368     elif not os.getenv('TERM'):
5369         return False
5370     try:
5371         return stream.isatty()
5372     except BaseException:
5373         return False
5374
5375
5376 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5377     if get_windows_version() < (10, 0, 10586):
5378         return
5379     global WINDOWS_VT_MODE
5380     try:
5381         Popen.run('', shell=True)
5382     except Exception:
5383         return
5384
5385     WINDOWS_VT_MODE = True
5386     supports_terminal_sequences.cache_clear()
5387
5388
5389 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5390
5391
5392 def remove_terminal_sequences(string):
5393     return _terminal_sequences_re.sub('', string)
5394
5395
5396 def number_of_digits(number):
5397     return len('%d' % number)
5398
5399
5400 def join_nonempty(*values, delim='-', from_dict=None):
5401     if from_dict is not None:
5402         values = (traverse_obj(from_dict, variadic(v)) for v in values)
5403     return delim.join(map(str, filter(None, values)))
5404
5405
5406 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5407     """
5408     Find the largest format dimensions in terms of video width and, for each thumbnail:
5409     * Modify the URL: Match the width with the provided regex and replace with the former width
5410     * Update dimensions
5411
5412     This function is useful with video services that scale the provided thumbnails on demand
5413     """
5414     _keys = ('width', 'height')
5415     max_dimensions = max(
5416         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5417         default=(0, 0))
5418     if not max_dimensions[0]:
5419         return thumbnails
5420     return [
5421         merge_dicts(
5422             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5423             dict(zip(_keys, max_dimensions)), thumbnail)
5424         for thumbnail in thumbnails
5425     ]
5426
5427
5428 def parse_http_range(range):
5429     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5430     if not range:
5431         return None, None, None
5432     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5433     if not crg:
5434         return None, None, None
5435     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5436
5437
5438 def read_stdin(what):
5439     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5440     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5441     return sys.stdin
5442
5443
5444 def determine_file_encoding(data):
5445     """
5446     Detect the text encoding used
5447     @returns (encoding, bytes to skip)
5448     """
5449
5450     # BOM marks are given priority over declarations
5451     for bom, enc in BOMS:
5452         if data.startswith(bom):
5453             return enc, len(bom)
5454
5455     # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
5456     # We ignore the endianness to get a good enough match
5457     data = data.replace(b'\0', b'')
5458     mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
5459     return mobj.group(1).decode() if mobj else None, 0
5460
5461
5462 class Config:
5463     own_args = None
5464     parsed_args = None
5465     filename = None
5466     __initialized = False
5467
5468     def __init__(self, parser, label=None):
5469         self.parser, self.label = parser, label
5470         self._loaded_paths, self.configs = set(), []
5471
5472     def init(self, args=None, filename=None):
5473         assert not self.__initialized
5474         self.own_args, self.filename = args, filename
5475         return self.load_configs()
5476
5477     def load_configs(self):
5478         directory = ''
5479         if self.filename:
5480             location = os.path.realpath(self.filename)
5481             directory = os.path.dirname(location)
5482             if location in self._loaded_paths:
5483                 return False
5484             self._loaded_paths.add(location)
5485
5486         self.__initialized = True
5487         opts, _ = self.parser.parse_known_args(self.own_args)
5488         self.parsed_args = self.own_args
5489         for location in opts.config_locations or []:
5490             if location == '-':
5491                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5492                 continue
5493             location = os.path.join(directory, expand_path(location))
5494             if os.path.isdir(location):
5495                 location = os.path.join(location, 'yt-dlp.conf')
5496             if not os.path.exists(location):
5497                 self.parser.error(f'config location {location} does not exist')
5498             self.append_config(self.read_file(location), location)
5499         return True
5500
5501     def __str__(self):
5502         label = join_nonempty(
5503             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5504             delim=' ')
5505         return join_nonempty(
5506             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5507             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5508             delim='\n')
5509
5510     @staticmethod
5511     def read_file(filename, default=[]):
5512         try:
5513             optionf = open(filename, 'rb')
5514         except OSError:
5515             return default  # silently skip if file is not present
5516         try:
5517             enc, skip = determine_file_encoding(optionf.read(512))
5518             optionf.seek(skip, io.SEEK_SET)
5519         except OSError:
5520             enc = None  # silently skip read errors
5521         try:
5522             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5523             contents = optionf.read().decode(enc or preferredencoding())
5524             res = shlex.split(contents, comments=True)
5525         except Exception as err:
5526             raise ValueError(f'Unable to parse "{filename}": {err}')
5527         finally:
5528             optionf.close()
5529         return res
5530
5531     @staticmethod
5532     def hide_login_info(opts):
5533         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5534         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5535
5536         def _scrub_eq(o):
5537             m = eqre.match(o)
5538             if m:
5539                 return m.group('key') + '=PRIVATE'
5540             else:
5541                 return o
5542
5543         opts = list(map(_scrub_eq, opts))
5544         for idx, opt in enumerate(opts):
5545             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5546                 opts[idx + 1] = 'PRIVATE'
5547         return opts
5548
5549     def append_config(self, *args, label=None):
5550         config = type(self)(self.parser, label)
5551         config._loaded_paths = self._loaded_paths
5552         if config.init(*args):
5553             self.configs.append(config)
5554
5555     @property
5556     def all_args(self):
5557         for config in reversed(self.configs):
5558             yield from config.all_args
5559         yield from self.parsed_args or []
5560
5561     def parse_known_args(self, **kwargs):
5562         return self.parser.parse_known_args(self.all_args, **kwargs)
5563
5564     def parse_args(self):
5565         return self.parser.parse_args(self.all_args)
5566
5567
5568 class WebSocketsWrapper():
5569     """Wraps websockets module to use in non-async scopes"""
5570     pool = None
5571
5572     def __init__(self, url, headers=None, connect=True):
5573         self.loop = asyncio.new_event_loop()
5574         # XXX: "loop" is deprecated
5575         self.conn = websockets.connect(
5576             url, extra_headers=headers, ping_interval=None,
5577             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5578         if connect:
5579             self.__enter__()
5580         atexit.register(self.__exit__, None, None, None)
5581
5582     def __enter__(self):
5583         if not self.pool:
5584             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5585         return self
5586
5587     def send(self, *args):
5588         self.run_with_loop(self.pool.send(*args), self.loop)
5589
5590     def recv(self, *args):
5591         return self.run_with_loop(self.pool.recv(*args), self.loop)
5592
5593     def __exit__(self, type, value, traceback):
5594         try:
5595             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5596         finally:
5597             self.loop.close()
5598             self._cancel_all_tasks(self.loop)
5599
5600     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5601     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5602     @staticmethod
5603     def run_with_loop(main, loop):
5604         if not asyncio.iscoroutine(main):
5605             raise ValueError(f'a coroutine was expected, got {main!r}')
5606
5607         try:
5608             return loop.run_until_complete(main)
5609         finally:
5610             loop.run_until_complete(loop.shutdown_asyncgens())
5611             if hasattr(loop, 'shutdown_default_executor'):
5612                 loop.run_until_complete(loop.shutdown_default_executor())
5613
5614     @staticmethod
5615     def _cancel_all_tasks(loop):
5616         to_cancel = asyncio.all_tasks(loop)
5617
5618         if not to_cancel:
5619             return
5620
5621         for task in to_cancel:
5622             task.cancel()
5623
5624         # XXX: "loop" is removed in python 3.10+
5625         loop.run_until_complete(
5626             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5627
5628         for task in to_cancel:
5629             if task.cancelled():
5630                 continue
5631             if task.exception() is not None:
5632                 loop.call_exception_handler({
5633                     'message': 'unhandled exception during asyncio.run() shutdown',
5634                     'exception': task.exception(),
5635                     'task': task,
5636                 })
5637
5638
5639 def merge_headers(*dicts):
5640     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5641     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5642
5643
5644 def cached_method(f):
5645     """Cache a method"""
5646     signature = inspect.signature(f)
5647
5648     @functools.wraps(f)
5649     def wrapper(self, *args, **kwargs):
5650         bound_args = signature.bind(self, *args, **kwargs)
5651         bound_args.apply_defaults()
5652         key = tuple(bound_args.arguments.values())
5653
5654         if not hasattr(self, '__cached_method__cache'):
5655             self.__cached_method__cache = {}
5656         cache = self.__cached_method__cache.setdefault(f.__name__, {})
5657         if key not in cache:
5658             cache[key] = f(self, *args, **kwargs)
5659         return cache[key]
5660     return wrapper
5661
5662
5663 class classproperty:
5664     """property access for class methods"""
5665
5666     def __init__(self, func):
5667         functools.update_wrapper(self, func)
5668         self.func = func
5669
5670     def __get__(self, _, cls):
5671         return self.func(cls)
5672
5673
5674 class Namespace(types.SimpleNamespace):
5675     """Immutable namespace"""
5676
5677     def __iter__(self):
5678         return iter(self.__dict__.values())
5679
5680     @property
5681     def items_(self):
5682         return self.__dict__.items()
5683
5684
5685 MEDIA_EXTENSIONS = Namespace(
5686     common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
5687     video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
5688     common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5689     audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'),
5690     thumbnails=('jpg', 'png', 'webp'),
5691     storyboards=('mhtml', ),
5692     subtitles=('srt', 'vtt', 'ass', 'lrc'),
5693     manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5694 )
5695 MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
5696 MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
5697
5698 KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
5699
5700
5701 class RetryManager:
5702     """Usage:
5703         for retry in RetryManager(...):
5704             try:
5705                 ...
5706             except SomeException as err:
5707                 retry.error = err
5708                 continue
5709     """
5710     attempt, _error = 0, None
5711
5712     def __init__(self, _retries, _error_callback, **kwargs):
5713         self.retries = _retries or 0
5714         self.error_callback = functools.partial(_error_callback, **kwargs)
5715
5716     def _should_retry(self):
5717         return self._error is not NO_DEFAULT and self.attempt <= self.retries
5718
5719     @property
5720     def error(self):
5721         if self._error is NO_DEFAULT:
5722             return None
5723         return self._error
5724
5725     @error.setter
5726     def error(self, value):
5727         self._error = value
5728
5729     def __iter__(self):
5730         while self._should_retry():
5731             self.error = NO_DEFAULT
5732             self.attempt += 1
5733             yield self
5734             if self.error:
5735                 self.error_callback(self.error, self.attempt, self.retries)
5736
5737     @staticmethod
5738     def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
5739         """Utility function for reporting retries"""
5740         if count > retries:
5741             if error:
5742                 return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
5743             raise e
5744
5745         if not count:
5746             return warn(e)
5747         elif isinstance(e, ExtractorError):
5748             e = remove_end(str(e.cause) or e.orig_msg, '.')
5749         warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5750
5751         delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
5752         if delay:
5753             info(f'Sleeping {delay:.2f} seconds ...')
5754             time.sleep(delay)
5755
5756
5757 def make_archive_id(ie, video_id):
5758     ie_key = ie if isinstance(ie, str) else ie.ie_key()
5759     return f'{ie_key.lower()} {video_id}'
5760
5761
5762 # Deprecated
5763 has_certifi = bool(certifi)
5764 has_websockets = bool(websockets)