yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import contextlib
   9 import ctypes
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import functools
  15 import gzip
  16 import hashlib
  17 import hmac
  18 import importlib.util
  19 import io
  20 import itertools
  21 import json
  22 import locale
  23 import math
  24 import mimetypes
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import shlex
  31 import socket
  32 import ssl
  33 import subprocess
  34 import sys
  35 import tempfile
  36 import time
  37 import traceback
  38 import urllib.parse
  39 import xml.etree.ElementTree
  40 import zlib
  41
  42 from .compat import (
  43     asyncio,
  44     compat_chr,
  45     compat_cookiejar,
  46     compat_etree_fromstring,
  47     compat_expanduser,
  48     compat_html_entities,
  49     compat_html_entities_html5,
  50     compat_HTMLParseError,
  51     compat_HTMLParser,
  52     compat_http_client,
  53     compat_HTTPError,
  54     compat_os_name,
  55     compat_parse_qs,
  56     compat_shlex_quote,
  57     compat_str,
  58     compat_struct_pack,
  59     compat_struct_unpack,
  60     compat_urllib_error,
  61     compat_urllib_parse_unquote_plus,
  62     compat_urllib_parse_urlencode,
  63     compat_urllib_parse_urlparse,
  64     compat_urllib_request,
  65     compat_urlparse,
  66 )
  67 from .dependencies import brotli, certifi, websockets
  68 from .socks import ProxyType, sockssocket
  69
  70
  71 def register_socks_protocols():
  72     # "Register" SOCKS protocols
  73     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  74     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  75     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  76         if scheme not in compat_urlparse.uses_netloc:
  77             compat_urlparse.uses_netloc.append(scheme)
  78
  79
  80 # This is not clearly defined otherwise
  81 compiled_regex_type = type(re.compile(''))
  82
  83
  84 def random_user_agent():
  85     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  86     _CHROME_VERSIONS = (
  87         '90.0.4430.212',
  88         '90.0.4430.24',
  89         '90.0.4430.70',
  90         '90.0.4430.72',
  91         '90.0.4430.85',
  92         '90.0.4430.93',
  93         '91.0.4472.101',
  94         '91.0.4472.106',
  95         '91.0.4472.114',
  96         '91.0.4472.124',
  97         '91.0.4472.164',
  98         '91.0.4472.19',
  99         '91.0.4472.77',
 100         '92.0.4515.107',
 101         '92.0.4515.115',
 102         '92.0.4515.131',
 103         '92.0.4515.159',
 104         '92.0.4515.43',
 105         '93.0.4556.0',
 106         '93.0.4577.15',
 107         '93.0.4577.63',
 108         '93.0.4577.82',
 109         '94.0.4606.41',
 110         '94.0.4606.54',
 111         '94.0.4606.61',
 112         '94.0.4606.71',
 113         '94.0.4606.81',
 114         '94.0.4606.85',
 115         '95.0.4638.17',
 116         '95.0.4638.50',
 117         '95.0.4638.54',
 118         '95.0.4638.69',
 119         '95.0.4638.74',
 120         '96.0.4664.18',
 121         '96.0.4664.45',
 122         '96.0.4664.55',
 123         '96.0.4664.93',
 124         '97.0.4692.20',
 125     )
 126     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 127
 128
 129 SUPPORTED_ENCODINGS = [
 130     'gzip', 'deflate'
 131 ]
 132 if brotli:
 133     SUPPORTED_ENCODINGS.append('br')
 134
 135 std_headers = {
 136     'User-Agent': random_user_agent(),
 137     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 138     'Accept-Language': 'en-us,en;q=0.5',
 139     'Sec-Fetch-Mode': 'navigate',
 140 }
 141
 142
 143 USER_AGENTS = {
 144     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 145 }
 146
 147
 148 NO_DEFAULT = object()
 149
 150 ENGLISH_MONTH_NAMES = [
 151     'January', 'February', 'March', 'April', 'May', 'June',
 152     'July', 'August', 'September', 'October', 'November', 'December']
 153
 154 MONTH_NAMES = {
 155     'en': ENGLISH_MONTH_NAMES,
 156     'fr': [
 157         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 158         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 159 }
 160
 161 KNOWN_EXTENSIONS = (
 162     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 163     'flv', 'f4v', 'f4a', 'f4b',
 164     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 165     'mkv', 'mka', 'mk3d',
 166     'avi', 'divx',
 167     'mov',
 168     'asf', 'wmv', 'wma',
 169     '3gp', '3g2',
 170     'mp3',
 171     'flac',
 172     'ape',
 173     'wav',
 174     'f4f', 'f4m', 'm3u8', 'smil')
 175
 176 # needed for sanitizing filenames in restricted mode
 177 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 178                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 179                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 180
 181 DATE_FORMATS = (
 182     '%d %B %Y',
 183     '%d %b %Y',
 184     '%B %d %Y',
 185     '%B %dst %Y',
 186     '%B %dnd %Y',
 187     '%B %drd %Y',
 188     '%B %dth %Y',
 189     '%b %d %Y',
 190     '%b %dst %Y',
 191     '%b %dnd %Y',
 192     '%b %drd %Y',
 193     '%b %dth %Y',
 194     '%b %dst %Y %I:%M',
 195     '%b %dnd %Y %I:%M',
 196     '%b %drd %Y %I:%M',
 197     '%b %dth %Y %I:%M',
 198     '%Y %m %d',
 199     '%Y-%m-%d',
 200     '%Y.%m.%d.',
 201     '%Y/%m/%d',
 202     '%Y/%m/%d %H:%M',
 203     '%Y/%m/%d %H:%M:%S',
 204     '%Y%m%d%H%M',
 205     '%Y%m%d%H%M%S',
 206     '%Y%m%d',
 207     '%Y-%m-%d %H:%M',
 208     '%Y-%m-%d %H:%M:%S',
 209     '%Y-%m-%d %H:%M:%S.%f',
 210     '%Y-%m-%d %H:%M:%S:%f',
 211     '%d.%m.%Y %H:%M',
 212     '%d.%m.%Y %H.%M',
 213     '%Y-%m-%dT%H:%M:%SZ',
 214     '%Y-%m-%dT%H:%M:%S.%fZ',
 215     '%Y-%m-%dT%H:%M:%S.%f0Z',
 216     '%Y-%m-%dT%H:%M:%S',
 217     '%Y-%m-%dT%H:%M:%S.%f',
 218     '%Y-%m-%dT%H:%M',
 219     '%b %d %Y at %H:%M',
 220     '%b %d %Y at %H:%M:%S',
 221     '%B %d %Y at %H:%M',
 222     '%B %d %Y at %H:%M:%S',
 223     '%H:%M %d-%b-%Y',
 224 )
 225
 226 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 227 DATE_FORMATS_DAY_FIRST.extend([
 228     '%d-%m-%Y',
 229     '%d.%m.%Y',
 230     '%d.%m.%y',
 231     '%d/%m/%Y',
 232     '%d/%m/%y',
 233     '%d/%m/%Y %H:%M:%S',
 234 ])
 235
 236 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 237 DATE_FORMATS_MONTH_FIRST.extend([
 238     '%m-%d-%Y',
 239     '%m.%d.%Y',
 240     '%m/%d/%Y',
 241     '%m/%d/%y',
 242     '%m/%d/%Y %H:%M:%S',
 243 ])
 244
 245 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 246 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 247
 248
 249 def preferredencoding():
 250     """Get preferred encoding.
 251
 252     Returns the best encoding scheme for the system, based on
 253     locale.getpreferredencoding() and some further tweaks.
 254     """
 255     try:
 256         pref = locale.getpreferredencoding()
 257         'TEST'.encode(pref)
 258     except Exception:
 259         pref = 'UTF-8'
 260
 261     return pref
 262
 263
 264 def write_json_file(obj, fn):
 265     """ Encode obj as JSON and write it to fn, atomically if possible """
 266
 267     tf = tempfile.NamedTemporaryFile(
 268         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 269         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 270
 271     try:
 272         with tf:
 273             json.dump(obj, tf, ensure_ascii=False)
 274         if sys.platform == 'win32':
 275             # Need to remove existing file on Windows, else os.rename raises
 276             # WindowsError or FileExistsError.
 277             with contextlib.suppress(OSError):
 278                 os.unlink(fn)
 279         with contextlib.suppress(OSError):
 280             mask = os.umask(0)
 281             os.umask(mask)
 282             os.chmod(tf.name, 0o666 & ~mask)
 283         os.rename(tf.name, fn)
 284     except Exception:
 285         with contextlib.suppress(OSError):
 286             os.remove(tf.name)
 287         raise
 288
 289
 290 def find_xpath_attr(node, xpath, key, val=None):
 291     """ Find the xpath xpath[@key=val] """
 292     assert re.match(r'^[a-zA-Z_-]+$', key)
 293     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 294     return node.find(expr)
 295
 296 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 297 # the namespace parameter
 298
 299
 300 def xpath_with_ns(path, ns_map):
 301     components = [c.split(':') for c in path.split('/')]
 302     replaced = []
 303     for c in components:
 304         if len(c) == 1:
 305             replaced.append(c[0])
 306         else:
 307             ns, tag = c
 308             replaced.append('{%s}%s' % (ns_map[ns], tag))
 309     return '/'.join(replaced)
 310
 311
 312 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 313     def _find_xpath(xpath):
 314         return node.find(xpath)
 315
 316     if isinstance(xpath, (str, compat_str)):
 317         n = _find_xpath(xpath)
 318     else:
 319         for xp in xpath:
 320             n = _find_xpath(xp)
 321             if n is not None:
 322                 break
 323
 324     if n is None:
 325         if default is not NO_DEFAULT:
 326             return default
 327         elif fatal:
 328             name = xpath if name is None else name
 329             raise ExtractorError('Could not find XML element %s' % name)
 330         else:
 331             return None
 332     return n
 333
 334
 335 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 336     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 337     if n is None or n == default:
 338         return n
 339     if n.text is None:
 340         if default is not NO_DEFAULT:
 341             return default
 342         elif fatal:
 343             name = xpath if name is None else name
 344             raise ExtractorError('Could not find XML element\'s text %s' % name)
 345         else:
 346             return None
 347     return n.text
 348
 349
 350 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 351     n = find_xpath_attr(node, xpath, key)
 352     if n is None:
 353         if default is not NO_DEFAULT:
 354             return default
 355         elif fatal:
 356             name = f'{xpath}[@{key}]' if name is None else name
 357             raise ExtractorError('Could not find XML attribute %s' % name)
 358         else:
 359             return None
 360     return n.attrib[key]
 361
 362
 363 def get_element_by_id(id, html):
 364     """Return the content of the tag with the specified ID in the passed HTML document"""
 365     return get_element_by_attribute('id', id, html)
 366
 367
 368 def get_element_html_by_id(id, html):
 369     """Return the html of the tag with the specified ID in the passed HTML document"""
 370     return get_element_html_by_attribute('id', id, html)
 371
 372
 373 def get_element_by_class(class_name, html):
 374     """Return the content of the first tag with the specified class in the passed HTML document"""
 375     retval = get_elements_by_class(class_name, html)
 376     return retval[0] if retval else None
 377
 378
 379 def get_element_html_by_class(class_name, html):
 380     """Return the html of the first tag with the specified class in the passed HTML document"""
 381     retval = get_elements_html_by_class(class_name, html)
 382     return retval[0] if retval else None
 383
 384
 385 def get_element_by_attribute(attribute, value, html, escape_value=True):
 386     retval = get_elements_by_attribute(attribute, value, html, escape_value)
 387     return retval[0] if retval else None
 388
 389
 390 def get_element_html_by_attribute(attribute, value, html, escape_value=True):
 391     retval = get_elements_html_by_attribute(attribute, value, html, escape_value)
 392     return retval[0] if retval else None
 393
 394
 395 def get_elements_by_class(class_name, html):
 396     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 397     return get_elements_by_attribute(
 398         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 399         html, escape_value=False)
 400
 401
 402 def get_elements_html_by_class(class_name, html):
 403     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 404     return get_elements_html_by_attribute(
 405         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 406         html, escape_value=False)
 407
 408
 409 def get_elements_by_attribute(*args, **kwargs):
 410     """Return the content of the tag with the specified attribute in the passed HTML document"""
 411     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 412
 413
 414 def get_elements_html_by_attribute(*args, **kwargs):
 415     """Return the html of the tag with the specified attribute in the passed HTML document"""
 416     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 417
 418
 419 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 420     """
 421     Return the text (content) and the html (whole) of the tag with the specified
 422     attribute in the passed HTML document
 423     """
 424
 425     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 426
 427     value = re.escape(value) if escape_value else value
 428
 429     partial_element_re = rf'''(?x)
 430         <(?P<tag>[a-zA-Z0-9:._-]+)
 431          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 432          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 433         '''
 434
 435     for m in re.finditer(partial_element_re, html):
 436         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 437
 438         yield (
 439             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 440             whole
 441         )
 442
 443
 444 class HTMLBreakOnClosingTagParser(compat_HTMLParser):
 445     """
 446     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 447     closing tag for the first opening tag it has encountered, and can be used
 448     as a context manager
 449     """
 450
 451     class HTMLBreakOnClosingTagException(Exception):
 452         pass
 453
 454     def __init__(self):
 455         self.tagstack = collections.deque()
 456         compat_HTMLParser.__init__(self)
 457
 458     def __enter__(self):
 459         return self
 460
 461     def __exit__(self, *_):
 462         self.close()
 463
 464     def close(self):
 465         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 466         # so data remains buffered; we no longer have any interest in it, thus
 467         # override this method to discard it
 468         pass
 469
 470     def handle_starttag(self, tag, _):
 471         self.tagstack.append(tag)
 472
 473     def handle_endtag(self, tag):
 474         if not self.tagstack:
 475             raise compat_HTMLParseError('no tags in the stack')
 476         while self.tagstack:
 477             inner_tag = self.tagstack.pop()
 478             if inner_tag == tag:
 479                 break
 480         else:
 481             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 482         if not self.tagstack:
 483             raise self.HTMLBreakOnClosingTagException()
 484
 485
 486 def get_element_text_and_html_by_tag(tag, html):
 487     """
 488     For the first element with the specified tag in the passed HTML document
 489     return its' content (text) and the whole element (html)
 490     """
 491     def find_or_raise(haystack, needle, exc):
 492         try:
 493             return haystack.index(needle)
 494         except ValueError:
 495             raise exc
 496     closing_tag = f'</{tag}>'
 497     whole_start = find_or_raise(
 498         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 499     content_start = find_or_raise(
 500         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 501     content_start += whole_start + 1
 502     with HTMLBreakOnClosingTagParser() as parser:
 503         parser.feed(html[whole_start:content_start])
 504         if not parser.tagstack or parser.tagstack[0] != tag:
 505             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 506         offset = content_start
 507         while offset < len(html):
 508             next_closing_tag_start = find_or_raise(
 509                 html[offset:], closing_tag,
 510                 compat_HTMLParseError(f'closing {tag} tag not found'))
 511             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 512             try:
 513                 parser.feed(html[offset:offset + next_closing_tag_end])
 514                 offset += next_closing_tag_end
 515             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 516                 return html[content_start:offset + next_closing_tag_start], \
 517                     html[whole_start:offset + next_closing_tag_end]
 518         raise compat_HTMLParseError('unexpected end of html')
 519
 520
 521 class HTMLAttributeParser(compat_HTMLParser):
 522     """Trivial HTML parser to gather the attributes for a single element"""
 523
 524     def __init__(self):
 525         self.attrs = {}
 526         compat_HTMLParser.__init__(self)
 527
 528     def handle_starttag(self, tag, attrs):
 529         self.attrs = dict(attrs)
 530
 531
 532 class HTMLListAttrsParser(compat_HTMLParser):
 533     """HTML parser to gather the attributes for the elements of a list"""
 534
 535     def __init__(self):
 536         compat_HTMLParser.__init__(self)
 537         self.items = []
 538         self._level = 0
 539
 540     def handle_starttag(self, tag, attrs):
 541         if tag == 'li' and self._level == 0:
 542             self.items.append(dict(attrs))
 543         self._level += 1
 544
 545     def handle_endtag(self, tag):
 546         self._level -= 1
 547
 548
 549 def extract_attributes(html_element):
 550     """Given a string for an HTML element such as
 551     <el
 552          a="foo" B="bar" c="&98;az" d=boz
 553          empty= noval entity="&amp;"
 554          sq='"' dq="'"
 555     >
 556     Decode and return a dictionary of attributes.
 557     {
 558         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 559         'empty': '', 'noval': None, 'entity': '&',
 560         'sq': '"', 'dq': '\''
 561     }.
 562     """
 563     parser = HTMLAttributeParser()
 564     with contextlib.suppress(compat_HTMLParseError):
 565         parser.feed(html_element)
 566         parser.close()
 567     return parser.attrs
 568
 569
 570 def parse_list(webpage):
 571     """Given a string for an series of HTML <li> elements,
 572     return a dictionary of their attributes"""
 573     parser = HTMLListAttrsParser()
 574     parser.feed(webpage)
 575     parser.close()
 576     return parser.items
 577
 578
 579 def clean_html(html):
 580     """Clean an HTML snippet into a readable string"""
 581
 582     if html is None:  # Convenience for sanitizing descriptions etc.
 583         return html
 584
 585     html = re.sub(r'\s+', ' ', html)
 586     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 587     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 588     # Strip html tags
 589     html = re.sub('<.*?>', '', html)
 590     # Replace html entities
 591     html = unescapeHTML(html)
 592     return html.strip()
 593
 594
 595 def sanitize_open(filename, open_mode):
 596     """Try to open the given filename, and slightly tweak it if this fails.
 597
 598     Attempts to open the given filename. If this fails, it tries to change
 599     the filename slightly, step by step, until it's either able to open it
 600     or it fails and raises a final exception, like the standard open()
 601     function.
 602
 603     It returns the tuple (stream, definitive_file_name).
 604     """
 605     if filename == '-':
 606         if sys.platform == 'win32':
 607             import msvcrt
 608             msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 609         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 610
 611     for attempt in range(2):
 612         try:
 613             try:
 614                 if sys.platform == 'win32':
 615                     # FIXME: An exclusive lock also locks the file from being read.
 616                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 617                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 618                     raise LockingUnsupportedError()
 619                 stream = locked_file(filename, open_mode, block=False).__enter__()
 620             except LockingUnsupportedError:
 621                 stream = open(filename, open_mode)
 622             return (stream, filename)
 623         except OSError as err:
 624             if attempt or err.errno in (errno.EACCES,):
 625                 raise
 626             old_filename, filename = filename, sanitize_path(filename)
 627             if old_filename == filename:
 628                 raise
 629
 630
 631 def timeconvert(timestr):
 632     """Convert RFC 2822 defined time string into system timestamp"""
 633     timestamp = None
 634     timetuple = email.utils.parsedate_tz(timestr)
 635     if timetuple is not None:
 636         timestamp = email.utils.mktime_tz(timetuple)
 637     return timestamp
 638
 639
 640 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 641     """Sanitizes a string so it could be used as part of a filename.
 642     @param restricted   Use a stricter subset of allowed characters
 643     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 644                         If unset, yt-dlp's new sanitization rules are in effect
 645     """
 646     if s == '':
 647         return ''
 648
 649     def replace_insane(char):
 650         if restricted and char in ACCENT_CHARS:
 651             return ACCENT_CHARS[char]
 652         elif not restricted and char == '\n':
 653             return '\0 '
 654         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 655             return ''
 656         elif char == '"':
 657             return '' if restricted else '\''
 658         elif char == ':':
 659             return '\0_\0-' if restricted else '\0 \0-'
 660         elif char in '\\/|*<>':
 661             return '\0_'
 662         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 663             return '\0_'
 664         return char
 665
 666     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 667     result = ''.join(map(replace_insane, s))
 668     if is_id is NO_DEFAULT:
 669         result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result)  # Remove repeated substitute chars
 670         STRIP_RE = '(?:\0.|[ _-])*'
 671         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 672     result = result.replace('\0', '') or '_'
 673
 674     if not is_id:
 675         while '__' in result:
 676             result = result.replace('__', '_')
 677         result = result.strip('_')
 678         # Common case of "Foreign band name - English song title"
 679         if restricted and result.startswith('-_'):
 680             result = result[2:]
 681         if result.startswith('-'):
 682             result = '_' + result[len('-'):]
 683         result = result.lstrip('.')
 684         if not result:
 685             result = '_'
 686     return result
 687
 688
 689 def sanitize_path(s, force=False):
 690     """Sanitizes and normalizes path on Windows"""
 691     if sys.platform == 'win32':
 692         force = False
 693         drive_or_unc, _ = os.path.splitdrive(s)
 694     elif force:
 695         drive_or_unc = ''
 696     else:
 697         return s
 698
 699     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 700     if drive_or_unc:
 701         norm_path.pop(0)
 702     sanitized_path = [
 703         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 704         for path_part in norm_path]
 705     if drive_or_unc:
 706         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 707     elif force and s and s[0] == os.path.sep:
 708         sanitized_path.insert(0, os.path.sep)
 709     return os.path.join(*sanitized_path)
 710
 711
 712 def sanitize_url(url):
 713     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 714     # the number of unwanted failures due to missing protocol
 715     if url.startswith('//'):
 716         return 'http:%s' % url
 717     # Fix some common typos seen so far
 718     COMMON_TYPOS = (
 719         # https://github.com/ytdl-org/youtube-dl/issues/15649
 720         (r'^httpss://', r'https://'),
 721         # https://bx1.be/lives/direct-tv/
 722         (r'^rmtp([es]?)://', r'rtmp\1://'),
 723     )
 724     for mistake, fixup in COMMON_TYPOS:
 725         if re.match(mistake, url):
 726             return re.sub(mistake, fixup, url)
 727     return url
 728
 729
 730 def extract_basic_auth(url):
 731     parts = compat_urlparse.urlsplit(url)
 732     if parts.username is None:
 733         return url, None
 734     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
 735         parts.hostname if parts.port is None
 736         else '%s:%d' % (parts.hostname, parts.port))))
 737     auth_payload = base64.b64encode(
 738         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
 739     return url, 'Basic ' + auth_payload.decode('utf-8')
 740
 741
 742 def sanitized_Request(url, *args, **kwargs):
 743     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 744     if auth_header is not None:
 745         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 746         headers['Authorization'] = auth_header
 747     return compat_urllib_request.Request(url, *args, **kwargs)
 748
 749
 750 def expand_path(s):
 751     """Expand shell variables and ~"""
 752     return os.path.expandvars(compat_expanduser(s))
 753
 754
 755 def orderedSet(iterable):
 756     """ Remove all duplicates from the input iterable """
 757     res = []
 758     for el in iterable:
 759         if el not in res:
 760             res.append(el)
 761     return res
 762
 763
 764 def _htmlentity_transform(entity_with_semicolon):
 765     """Transforms an HTML entity to a character."""
 766     entity = entity_with_semicolon[:-1]
 767
 768     # Known non-numeric HTML entity
 769     if entity in compat_html_entities.name2codepoint:
 770         return compat_chr(compat_html_entities.name2codepoint[entity])
 771
 772     # TODO: HTML5 allows entities without a semicolon. For example,
 773     # '&Eacuteric' should be decoded as 'Éric'.
 774     if entity_with_semicolon in compat_html_entities_html5:
 775         return compat_html_entities_html5[entity_with_semicolon]
 776
 777     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 778     if mobj is not None:
 779         numstr = mobj.group(1)
 780         if numstr.startswith('x'):
 781             base = 16
 782             numstr = '0%s' % numstr
 783         else:
 784             base = 10
 785         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 786         with contextlib.suppress(ValueError):
 787             return compat_chr(int(numstr, base))
 788
 789     # Unknown entity in name, return its literal representation
 790     return '&%s;' % entity
 791
 792
 793 def unescapeHTML(s):
 794     if s is None:
 795         return None
 796     assert isinstance(s, str)
 797
 798     return re.sub(
 799         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 800
 801
 802 def escapeHTML(text):
 803     return (
 804         text
 805         .replace('&', '&amp;')
 806         .replace('<', '&lt;')
 807         .replace('>', '&gt;')
 808         .replace('"', '&quot;')
 809         .replace("'", '&#39;')
 810     )
 811
 812
 813 def process_communicate_or_kill(p, *args, **kwargs):
 814     try:
 815         return p.communicate(*args, **kwargs)
 816     except BaseException:  # Including KeyboardInterrupt
 817         p.kill()
 818         p.wait()
 819         raise
 820
 821
 822 class Popen(subprocess.Popen):
 823     if sys.platform == 'win32':
 824         _startupinfo = subprocess.STARTUPINFO()
 825         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 826     else:
 827         _startupinfo = None
 828
 829     def __init__(self, *args, **kwargs):
 830         super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
 831
 832     def communicate_or_kill(self, *args, **kwargs):
 833         return process_communicate_or_kill(self, *args, **kwargs)
 834
 835
 836 def get_subprocess_encoding():
 837     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 838         # For subprocess calls, encode with locale encoding
 839         # Refer to http://stackoverflow.com/a/9951851/35070
 840         encoding = preferredencoding()
 841     else:
 842         encoding = sys.getfilesystemencoding()
 843     if encoding is None:
 844         encoding = 'utf-8'
 845     return encoding
 846
 847
 848 def encodeFilename(s, for_subprocess=False):
 849     assert isinstance(s, str)
 850     return s
 851
 852
 853 def decodeFilename(b, for_subprocess=False):
 854     return b
 855
 856
 857 def encodeArgument(s):
 858     # Legacy code that uses byte strings
 859     # Uncomment the following line after fixing all post processors
 860     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 861     return s if isinstance(s, str) else s.decode('ascii')
 862
 863
 864 def decodeArgument(b):
 865     return b
 866
 867
 868 def decodeOption(optval):
 869     if optval is None:
 870         return optval
 871     if isinstance(optval, bytes):
 872         optval = optval.decode(preferredencoding())
 873
 874     assert isinstance(optval, compat_str)
 875     return optval
 876
 877
 878 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 879
 880
 881 def timetuple_from_msec(msec):
 882     secs, msec = divmod(msec, 1000)
 883     mins, secs = divmod(secs, 60)
 884     hrs, mins = divmod(mins, 60)
 885     return _timetuple(hrs, mins, secs, msec)
 886
 887
 888 def formatSeconds(secs, delim=':', msec=False):
 889     time = timetuple_from_msec(secs * 1000)
 890     if time.hours:
 891         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 892     elif time.minutes:
 893         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 894     else:
 895         ret = '%d' % time.seconds
 896     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 897
 898
 899 def _ssl_load_windows_store_certs(ssl_context, storename):
 900     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 901     try:
 902         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 903                  if encoding == 'x509_asn' and (
 904                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 905     except PermissionError:
 906         return
 907     for cert in certs:
 908         with contextlib.suppress(ssl.SSLError):
 909             ssl_context.load_verify_locations(cadata=cert)
 910
 911
 912 def make_HTTPS_handler(params, **kwargs):
 913     opts_check_certificate = not params.get('nocheckcertificate')
 914     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 915     context.check_hostname = opts_check_certificate
 916     if params.get('legacyserverconnect'):
 917         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 918     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 919     if opts_check_certificate:
 920         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 921             context.load_verify_locations(cafile=certifi.where())
 922         else:
 923             try:
 924                 context.load_default_certs()
 925                 # Work around the issue in load_default_certs when there are bad certificates. See:
 926                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
 927                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 928             except ssl.SSLError:
 929                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 930                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 931                     # Create a new context to discard any certificates that were already loaded
 932                     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 933                     context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
 934                     for storename in ('CA', 'ROOT'):
 935                         _ssl_load_windows_store_certs(context, storename)
 936                 context.set_default_verify_paths()
 937     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 938
 939
 940 def bug_reports_message(before=';'):
 941     msg = ('please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , '
 942            'filling out the appropriate issue template. '
 943            'Confirm you are on the latest version using  yt-dlp -U')
 944
 945     before = before.rstrip()
 946     if not before or before.endswith(('.', '!', '?')):
 947         msg = msg[0].title() + msg[1:]
 948
 949     return (before + ' ' if before else '') + msg
 950
 951
 952 class YoutubeDLError(Exception):
 953     """Base exception for YoutubeDL errors."""
 954     msg = None
 955
 956     def __init__(self, msg=None):
 957         if msg is not None:
 958             self.msg = msg
 959         elif self.msg is None:
 960             self.msg = type(self).__name__
 961         super().__init__(self.msg)
 962
 963
 964 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
 965 if hasattr(ssl, 'CertificateError'):
 966     network_exceptions.append(ssl.CertificateError)
 967 network_exceptions = tuple(network_exceptions)
 968
 969
 970 class ExtractorError(YoutubeDLError):
 971     """Error during info extraction."""
 972
 973     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
 974         """ tb, if given, is the original traceback (so that it can be printed out).
 975         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
 976         """
 977         if sys.exc_info()[0] in network_exceptions:
 978             expected = True
 979
 980         self.orig_msg = str(msg)
 981         self.traceback = tb
 982         self.expected = expected
 983         self.cause = cause
 984         self.video_id = video_id
 985         self.ie = ie
 986         self.exc_info = sys.exc_info()  # preserve original exception
 987
 988         super().__init__(''.join((
 989             format_field(ie, template='[%s] '),
 990             format_field(video_id, template='%s: '),
 991             msg,
 992             format_field(cause, template=' (caused by %r)'),
 993             '' if expected else bug_reports_message())))
 994
 995     def format_traceback(self):
 996         return join_nonempty(
 997             self.traceback and ''.join(traceback.format_tb(self.traceback)),
 998             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
 999             delim='\n') or None
1000
1001
1002 class UnsupportedError(ExtractorError):
1003     def __init__(self, url):
1004         super().__init__(
1005             'Unsupported URL: %s' % url, expected=True)
1006         self.url = url
1007
1008
1009 class RegexNotFoundError(ExtractorError):
1010     """Error when a regex didn't match"""
1011     pass
1012
1013
1014 class GeoRestrictedError(ExtractorError):
1015     """Geographic restriction Error exception.
1016
1017     This exception may be thrown when a video is not available from your
1018     geographic location due to geographic restrictions imposed by a website.
1019     """
1020
1021     def __init__(self, msg, countries=None, **kwargs):
1022         kwargs['expected'] = True
1023         super().__init__(msg, **kwargs)
1024         self.countries = countries
1025
1026
1027 class DownloadError(YoutubeDLError):
1028     """Download Error exception.
1029
1030     This exception may be thrown by FileDownloader objects if they are not
1031     configured to continue on errors. They will contain the appropriate
1032     error message.
1033     """
1034
1035     def __init__(self, msg, exc_info=None):
1036         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1037         super().__init__(msg)
1038         self.exc_info = exc_info
1039
1040
1041 class EntryNotInPlaylist(YoutubeDLError):
1042     """Entry not in playlist exception.
1043
1044     This exception will be thrown by YoutubeDL when a requested entry
1045     is not found in the playlist info_dict
1046     """
1047     msg = 'Entry not found in info'
1048
1049
1050 class SameFileError(YoutubeDLError):
1051     """Same File exception.
1052
1053     This exception will be thrown by FileDownloader objects if they detect
1054     multiple files would have to be downloaded to the same file on disk.
1055     """
1056     msg = 'Fixed output name but more than one file to download'
1057
1058     def __init__(self, filename=None):
1059         if filename is not None:
1060             self.msg += f': {filename}'
1061         super().__init__(self.msg)
1062
1063
1064 class PostProcessingError(YoutubeDLError):
1065     """Post Processing exception.
1066
1067     This exception may be raised by PostProcessor's .run() method to
1068     indicate an error in the postprocessing task.
1069     """
1070
1071
1072 class DownloadCancelled(YoutubeDLError):
1073     """ Exception raised when the download queue should be interrupted """
1074     msg = 'The download was cancelled'
1075
1076
1077 class ExistingVideoReached(DownloadCancelled):
1078     """ --break-on-existing triggered """
1079     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1080
1081
1082 class RejectedVideoReached(DownloadCancelled):
1083     """ --break-on-reject triggered """
1084     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1085
1086
1087 class MaxDownloadsReached(DownloadCancelled):
1088     """ --max-downloads limit has been reached. """
1089     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1090
1091
1092 class ReExtractInfo(YoutubeDLError):
1093     """ Video info needs to be re-extracted. """
1094
1095     def __init__(self, msg, expected=False):
1096         super().__init__(msg)
1097         self.expected = expected
1098
1099
1100 class ThrottledDownload(ReExtractInfo):
1101     """ Download speed below --throttled-rate. """
1102     msg = 'The download speed is below throttle limit'
1103
1104     def __init__(self):
1105         super().__init__(self.msg, expected=False)
1106
1107
1108 class UnavailableVideoError(YoutubeDLError):
1109     """Unavailable Format exception.
1110
1111     This exception will be thrown when a video is requested
1112     in a format that is not available for that video.
1113     """
1114     msg = 'Unable to download video'
1115
1116     def __init__(self, err=None):
1117         if err is not None:
1118             self.msg += f': {err}'
1119         super().__init__(self.msg)
1120
1121
1122 class ContentTooShortError(YoutubeDLError):
1123     """Content Too Short exception.
1124
1125     This exception may be raised by FileDownloader objects when a file they
1126     download is too small for what the server announced first, indicating
1127     the connection was probably interrupted.
1128     """
1129
1130     def __init__(self, downloaded, expected):
1131         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1132         # Both in bytes
1133         self.downloaded = downloaded
1134         self.expected = expected
1135
1136
1137 class XAttrMetadataError(YoutubeDLError):
1138     def __init__(self, code=None, msg='Unknown error'):
1139         super().__init__(msg)
1140         self.code = code
1141         self.msg = msg
1142
1143         # Parsing code and msg
1144         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1145                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1146             self.reason = 'NO_SPACE'
1147         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1148             self.reason = 'VALUE_TOO_LONG'
1149         else:
1150             self.reason = 'NOT_SUPPORTED'
1151
1152
1153 class XAttrUnavailableError(YoutubeDLError):
1154     pass
1155
1156
1157 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1158     hc = http_class(*args, **kwargs)
1159     source_address = ydl_handler._params.get('source_address')
1160
1161     if source_address is not None:
1162         # This is to workaround _create_connection() from socket where it will try all
1163         # address data from getaddrinfo() including IPv6. This filters the result from
1164         # getaddrinfo() based on the source_address value.
1165         # This is based on the cpython socket.create_connection() function.
1166         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1167         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1168             host, port = address
1169             err = None
1170             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1171             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1172             ip_addrs = [addr for addr in addrs if addr[0] == af]
1173             if addrs and not ip_addrs:
1174                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1175                 raise OSError(
1176                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1177                     % (ip_version, source_address[0]))
1178             for res in ip_addrs:
1179                 af, socktype, proto, canonname, sa = res
1180                 sock = None
1181                 try:
1182                     sock = socket.socket(af, socktype, proto)
1183                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1184                         sock.settimeout(timeout)
1185                     sock.bind(source_address)
1186                     sock.connect(sa)
1187                     err = None  # Explicitly break reference cycle
1188                     return sock
1189                 except OSError as _:
1190                     err = _
1191                     if sock is not None:
1192                         sock.close()
1193             if err is not None:
1194                 raise err
1195             else:
1196                 raise OSError('getaddrinfo returns an empty list')
1197         if hasattr(hc, '_create_connection'):
1198             hc._create_connection = _create_connection
1199         hc.source_address = (source_address, 0)
1200
1201     return hc
1202
1203
1204 def handle_youtubedl_headers(headers):
1205     filtered_headers = headers
1206
1207     if 'Youtubedl-no-compression' in filtered_headers:
1208         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1209         del filtered_headers['Youtubedl-no-compression']
1210
1211     return filtered_headers
1212
1213
1214 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
1215     """Handler for HTTP requests and responses.
1216
1217     This class, when installed with an OpenerDirector, automatically adds
1218     the standard headers to every HTTP request and handles gzipped and
1219     deflated responses from web servers. If compression is to be avoided in
1220     a particular request, the original request in the program code only has
1221     to include the HTTP header "Youtubedl-no-compression", which will be
1222     removed before making the real request.
1223
1224     Part of this code was copied from:
1225
1226     http://techknack.net/python-urllib2-handlers/
1227
1228     Andrew Rowls, the author of that code, agreed to release it to the
1229     public domain.
1230     """
1231
1232     def __init__(self, params, *args, **kwargs):
1233         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
1234         self._params = params
1235
1236     def http_open(self, req):
1237         conn_class = compat_http_client.HTTPConnection
1238
1239         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1240         if socks_proxy:
1241             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1242             del req.headers['Ytdl-socks-proxy']
1243
1244         return self.do_open(functools.partial(
1245             _create_http_connection, self, conn_class, False),
1246             req)
1247
1248     @staticmethod
1249     def deflate(data):
1250         if not data:
1251             return data
1252         try:
1253             return zlib.decompress(data, -zlib.MAX_WBITS)
1254         except zlib.error:
1255             return zlib.decompress(data)
1256
1257     @staticmethod
1258     def brotli(data):
1259         if not data:
1260             return data
1261         return brotli.decompress(data)
1262
1263     def http_request(self, req):
1264         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1265         # always respected by websites, some tend to give out URLs with non percent-encoded
1266         # non-ASCII characters (see telemb.py, ard.py [#3412])
1267         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1268         # To work around aforementioned issue we will replace request's original URL with
1269         # percent-encoded one
1270         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1271         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1272         url = req.get_full_url()
1273         url_escaped = escape_url(url)
1274
1275         # Substitute URL if any change after escaping
1276         if url != url_escaped:
1277             req = update_Request(req, url=url_escaped)
1278
1279         for h, v in self._params.get('http_headers', std_headers).items():
1280             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1281             # The dict keys are capitalized because of this bug by urllib
1282             if h.capitalize() not in req.headers:
1283                 req.add_header(h, v)
1284
1285         if 'Accept-encoding' not in req.headers:
1286             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1287
1288         req.headers = handle_youtubedl_headers(req.headers)
1289
1290         return req
1291
1292     def http_response(self, req, resp):
1293         old_resp = resp
1294         # gzip
1295         if resp.headers.get('Content-encoding', '') == 'gzip':
1296             content = resp.read()
1297             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1298             try:
1299                 uncompressed = io.BytesIO(gz.read())
1300             except OSError as original_ioerror:
1301                 # There may be junk add the end of the file
1302                 # See http://stackoverflow.com/q/4928560/35070 for details
1303                 for i in range(1, 1024):
1304                     try:
1305                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1306                         uncompressed = io.BytesIO(gz.read())
1307                     except OSError:
1308                         continue
1309                     break
1310                 else:
1311                     raise original_ioerror
1312             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1313             resp.msg = old_resp.msg
1314             del resp.headers['Content-encoding']
1315         # deflate
1316         if resp.headers.get('Content-encoding', '') == 'deflate':
1317             gz = io.BytesIO(self.deflate(resp.read()))
1318             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1319             resp.msg = old_resp.msg
1320             del resp.headers['Content-encoding']
1321         # brotli
1322         if resp.headers.get('Content-encoding', '') == 'br':
1323             resp = compat_urllib_request.addinfourl(
1324                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1325             resp.msg = old_resp.msg
1326             del resp.headers['Content-encoding']
1327         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1328         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1329         if 300 <= resp.code < 400:
1330             location = resp.headers.get('Location')
1331             if location:
1332                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1333                 location = location.encode('iso-8859-1').decode('utf-8')
1334                 location_escaped = escape_url(location)
1335                 if location != location_escaped:
1336                     del resp.headers['Location']
1337                     resp.headers['Location'] = location_escaped
1338         return resp
1339
1340     https_request = http_request
1341     https_response = http_response
1342
1343
1344 def make_socks_conn_class(base_class, socks_proxy):
1345     assert issubclass(base_class, (
1346         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
1347
1348     url_components = compat_urlparse.urlparse(socks_proxy)
1349     if url_components.scheme.lower() == 'socks5':
1350         socks_type = ProxyType.SOCKS5
1351     elif url_components.scheme.lower() in ('socks', 'socks4'):
1352         socks_type = ProxyType.SOCKS4
1353     elif url_components.scheme.lower() == 'socks4a':
1354         socks_type = ProxyType.SOCKS4A
1355
1356     def unquote_if_non_empty(s):
1357         if not s:
1358             return s
1359         return compat_urllib_parse_unquote_plus(s)
1360
1361     proxy_args = (
1362         socks_type,
1363         url_components.hostname, url_components.port or 1080,
1364         True,  # Remote DNS
1365         unquote_if_non_empty(url_components.username),
1366         unquote_if_non_empty(url_components.password),
1367     )
1368
1369     class SocksConnection(base_class):
1370         def connect(self):
1371             self.sock = sockssocket()
1372             self.sock.setproxy(*proxy_args)
1373             if isinstance(self.timeout, (int, float)):
1374                 self.sock.settimeout(self.timeout)
1375             self.sock.connect((self.host, self.port))
1376
1377             if isinstance(self, compat_http_client.HTTPSConnection):
1378                 if hasattr(self, '_context'):  # Python > 2.6
1379                     self.sock = self._context.wrap_socket(
1380                         self.sock, server_hostname=self.host)
1381                 else:
1382                     self.sock = ssl.wrap_socket(self.sock)
1383
1384     return SocksConnection
1385
1386
1387 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
1388     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1389         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
1390         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
1391         self._params = params
1392
1393     def https_open(self, req):
1394         kwargs = {}
1395         conn_class = self._https_conn_class
1396
1397         if hasattr(self, '_context'):  # python > 2.6
1398             kwargs['context'] = self._context
1399         if hasattr(self, '_check_hostname'):  # python 3.x
1400             kwargs['check_hostname'] = self._check_hostname
1401
1402         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1403         if socks_proxy:
1404             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1405             del req.headers['Ytdl-socks-proxy']
1406
1407         return self.do_open(functools.partial(
1408             _create_http_connection, self, conn_class, True),
1409             req, **kwargs)
1410
1411
1412 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1413     """
1414     See [1] for cookie file format.
1415
1416     1. https://curl.haxx.se/docs/http-cookies.html
1417     """
1418     _HTTPONLY_PREFIX = '#HttpOnly_'
1419     _ENTRY_LEN = 7
1420     _HEADER = '''# Netscape HTTP Cookie File
1421 # This file is generated by yt-dlp.  Do not edit.
1422
1423 '''
1424     _CookieFileEntry = collections.namedtuple(
1425         'CookieFileEntry',
1426         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1427
1428     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1429         """
1430         Save cookies to a file.
1431
1432         Most of the code is taken from CPython 3.8 and slightly adapted
1433         to support cookie files with UTF-8 in both python 2 and 3.
1434         """
1435         if filename is None:
1436             if self.filename is not None:
1437                 filename = self.filename
1438             else:
1439                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1440
1441         # Store session cookies with `expires` set to 0 instead of an empty
1442         # string
1443         for cookie in self:
1444             if cookie.expires is None:
1445                 cookie.expires = 0
1446
1447         with open(filename, 'w', encoding='utf-8') as f:
1448             f.write(self._HEADER)
1449             now = time.time()
1450             for cookie in self:
1451                 if not ignore_discard and cookie.discard:
1452                     continue
1453                 if not ignore_expires and cookie.is_expired(now):
1454                     continue
1455                 if cookie.secure:
1456                     secure = 'TRUE'
1457                 else:
1458                     secure = 'FALSE'
1459                 if cookie.domain.startswith('.'):
1460                     initial_dot = 'TRUE'
1461                 else:
1462                     initial_dot = 'FALSE'
1463                 if cookie.expires is not None:
1464                     expires = compat_str(cookie.expires)
1465                 else:
1466                     expires = ''
1467                 if cookie.value is None:
1468                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
1469                     # with no name, whereas http.cookiejar regards it as a
1470                     # cookie with no value.
1471                     name = ''
1472                     value = cookie.name
1473                 else:
1474                     name = cookie.name
1475                     value = cookie.value
1476                 f.write(
1477                     '\t'.join([cookie.domain, initial_dot, cookie.path,
1478                                secure, expires, name, value]) + '\n')
1479
1480     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1481         """Load cookies from a file."""
1482         if filename is None:
1483             if self.filename is not None:
1484                 filename = self.filename
1485             else:
1486                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1487
1488         def prepare_line(line):
1489             if line.startswith(self._HTTPONLY_PREFIX):
1490                 line = line[len(self._HTTPONLY_PREFIX):]
1491             # comments and empty lines are fine
1492             if line.startswith('#') or not line.strip():
1493                 return line
1494             cookie_list = line.split('\t')
1495             if len(cookie_list) != self._ENTRY_LEN:
1496                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
1497             cookie = self._CookieFileEntry(*cookie_list)
1498             if cookie.expires_at and not cookie.expires_at.isdigit():
1499                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1500             return line
1501
1502         cf = io.StringIO()
1503         with open(filename, encoding='utf-8') as f:
1504             for line in f:
1505                 try:
1506                     cf.write(prepare_line(line))
1507                 except compat_cookiejar.LoadError as e:
1508                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1509                     continue
1510         cf.seek(0)
1511         self._really_load(cf, filename, ignore_discard, ignore_expires)
1512         # Session cookies are denoted by either `expires` field set to
1513         # an empty string or 0. MozillaCookieJar only recognizes the former
1514         # (see [1]). So we need force the latter to be recognized as session
1515         # cookies on our own.
1516         # Session cookies may be important for cookies-based authentication,
1517         # e.g. usually, when user does not check 'Remember me' check box while
1518         # logging in on a site, some important cookies are stored as session
1519         # cookies so that not recognizing them will result in failed login.
1520         # 1. https://bugs.python.org/issue17164
1521         for cookie in self:
1522             # Treat `expires=0` cookies as session cookies
1523             if cookie.expires == 0:
1524                 cookie.expires = None
1525                 cookie.discard = True
1526
1527
1528 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
1529     def __init__(self, cookiejar=None):
1530         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
1531
1532     def http_response(self, request, response):
1533         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
1534
1535     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
1536     https_response = http_response
1537
1538
1539 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
1540     """YoutubeDL redirect handler
1541
1542     The code is based on HTTPRedirectHandler implementation from CPython [1].
1543
1544     This redirect handler solves two issues:
1545      - ensures redirect URL is always unicode under python 2
1546      - introduces support for experimental HTTP response status code
1547        308 Permanent Redirect [2] used by some sites [3]
1548
1549     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1550     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1551     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1552     """
1553
1554     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
1555
1556     def redirect_request(self, req, fp, code, msg, headers, newurl):
1557         """Return a Request or None in response to a redirect.
1558
1559         This is called by the http_error_30x methods when a
1560         redirection response is received.  If a redirection should
1561         take place, return a new Request to allow http_error_30x to
1562         perform the redirect.  Otherwise, raise HTTPError if no-one
1563         else should try to handle this url.  Return None if you can't
1564         but another Handler might.
1565         """
1566         m = req.get_method()
1567         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1568                  or code in (301, 302, 303) and m == "POST")):
1569             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
1570         # Strictly (according to RFC 2616), 301 or 302 in response to
1571         # a POST MUST NOT cause a redirection without confirmation
1572         # from the user (of urllib.request, in this case).  In practice,
1573         # essentially all clients do redirect in this case, so we do
1574         # the same.
1575
1576         # Be conciliant with URIs containing a space.  This is mainly
1577         # redundant with the more complete encoding done in http_error_302(),
1578         # but it is kept for compatibility with other callers.
1579         newurl = newurl.replace(' ', '%20')
1580
1581         CONTENT_HEADERS = ("content-length", "content-type")
1582         # NB: don't use dict comprehension for python 2.6 compatibility
1583         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1584         return compat_urllib_request.Request(
1585             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1586             unverifiable=True)
1587
1588
1589 def extract_timezone(date_str):
1590     m = re.search(
1591         r'''(?x)
1592             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1593             (?P<tz>Z|                                            # just the UTC Z, or
1594                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1595                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1596                    [ ]?                                          # optional space
1597                 (?P<sign>\+|-)                                   # +/-
1598                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1599             $)
1600         ''', date_str)
1601     if not m:
1602         timezone = datetime.timedelta()
1603     else:
1604         date_str = date_str[:-len(m.group('tz'))]
1605         if not m.group('sign'):
1606             timezone = datetime.timedelta()
1607         else:
1608             sign = 1 if m.group('sign') == '+' else -1
1609             timezone = datetime.timedelta(
1610                 hours=sign * int(m.group('hours')),
1611                 minutes=sign * int(m.group('minutes')))
1612     return timezone, date_str
1613
1614
1615 def parse_iso8601(date_str, delimiter='T', timezone=None):
1616     """ Return a UNIX timestamp from the given date """
1617
1618     if date_str is None:
1619         return None
1620
1621     date_str = re.sub(r'\.[0-9]+', '', date_str)
1622
1623     if timezone is None:
1624         timezone, date_str = extract_timezone(date_str)
1625
1626     with contextlib.suppress(ValueError):
1627         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1628         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1629         return calendar.timegm(dt.timetuple())
1630
1631
1632 def date_formats(day_first=True):
1633     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1634
1635
1636 def unified_strdate(date_str, day_first=True):
1637     """Return a string with the date in the format YYYYMMDD"""
1638
1639     if date_str is None:
1640         return None
1641     upload_date = None
1642     # Replace commas
1643     date_str = date_str.replace(',', ' ')
1644     # Remove AM/PM + timezone
1645     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1646     _, date_str = extract_timezone(date_str)
1647
1648     for expression in date_formats(day_first):
1649         with contextlib.suppress(ValueError):
1650             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1651     if upload_date is None:
1652         timetuple = email.utils.parsedate_tz(date_str)
1653         if timetuple:
1654             with contextlib.suppress(ValueError):
1655                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1656     if upload_date is not None:
1657         return compat_str(upload_date)
1658
1659
1660 def unified_timestamp(date_str, day_first=True):
1661     if date_str is None:
1662         return None
1663
1664     date_str = re.sub(r'[,|]', '', date_str)
1665
1666     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1667     timezone, date_str = extract_timezone(date_str)
1668
1669     # Remove AM/PM + timezone
1670     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1671
1672     # Remove unrecognized timezones from ISO 8601 alike timestamps
1673     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1674     if m:
1675         date_str = date_str[:-len(m.group('tz'))]
1676
1677     # Python only supports microseconds, so remove nanoseconds
1678     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1679     if m:
1680         date_str = m.group(1)
1681
1682     for expression in date_formats(day_first):
1683         with contextlib.suppress(ValueError):
1684             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1685             return calendar.timegm(dt.timetuple())
1686     timetuple = email.utils.parsedate_tz(date_str)
1687     if timetuple:
1688         return calendar.timegm(timetuple) + pm_delta * 3600
1689
1690
1691 def determine_ext(url, default_ext='unknown_video'):
1692     if url is None or '.' not in url:
1693         return default_ext
1694     guess = url.partition('?')[0].rpartition('.')[2]
1695     if re.match(r'^[A-Za-z0-9]+$', guess):
1696         return guess
1697     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1698     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1699         return guess.rstrip('/')
1700     else:
1701         return default_ext
1702
1703
1704 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1705     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1706
1707
1708 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1709     """
1710     Return a datetime object from a string in the format YYYYMMDD or
1711     (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
1712
1713     format: string date format used to return datetime object from
1714     precision: round the time portion of a datetime object.
1715                 auto|microsecond|second|minute|hour|day.
1716                 auto: round to the unit provided in date_str (if applicable).
1717     """
1718     auto_precision = False
1719     if precision == 'auto':
1720         auto_precision = True
1721         precision = 'microsecond'
1722     today = datetime_round(datetime.datetime.utcnow(), precision)
1723     if date_str in ('now', 'today'):
1724         return today
1725     if date_str == 'yesterday':
1726         return today - datetime.timedelta(days=1)
1727     match = re.match(
1728         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
1729         date_str)
1730     if match is not None:
1731         start_time = datetime_from_str(match.group('start'), precision, format)
1732         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1733         unit = match.group('unit')
1734         if unit == 'month' or unit == 'year':
1735             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1736             unit = 'day'
1737         else:
1738             if unit == 'week':
1739                 unit = 'day'
1740                 time *= 7
1741             delta = datetime.timedelta(**{unit + 's': time})
1742             new_date = start_time + delta
1743         if auto_precision:
1744             return datetime_round(new_date, unit)
1745         return new_date
1746
1747     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1748
1749
1750 def date_from_str(date_str, format='%Y%m%d', strict=False):
1751     """
1752     Return a datetime object from a string in the format YYYYMMDD or
1753     (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
1754
1755     If "strict", only (now|today)[+-][0-9](day|week|month|year)(s)? is allowed
1756
1757     format: string date format used to return datetime object from
1758     """
1759     if strict and not re.fullmatch(r'\d{8}|(now|today)[+-]\d+(day|week|month|year)(s)?', date_str):
1760         raise ValueError(f'Invalid date format {date_str}')
1761     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1762
1763
1764 def datetime_add_months(dt, months):
1765     """Increment/Decrement a datetime object by months."""
1766     month = dt.month + months - 1
1767     year = dt.year + month // 12
1768     month = month % 12 + 1
1769     day = min(dt.day, calendar.monthrange(year, month)[1])
1770     return dt.replace(year, month, day)
1771
1772
1773 def datetime_round(dt, precision='day'):
1774     """
1775     Round a datetime object's time to a specific precision
1776     """
1777     if precision == 'microsecond':
1778         return dt
1779
1780     unit_seconds = {
1781         'day': 86400,
1782         'hour': 3600,
1783         'minute': 60,
1784         'second': 1,
1785     }
1786     roundto = lambda x, n: ((x + n / 2) // n) * n
1787     timestamp = calendar.timegm(dt.timetuple())
1788     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1789
1790
1791 def hyphenate_date(date_str):
1792     """
1793     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1794     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1795     if match is not None:
1796         return '-'.join(match.groups())
1797     else:
1798         return date_str
1799
1800
1801 class DateRange:
1802     """Represents a time interval between two dates"""
1803
1804     def __init__(self, start=None, end=None):
1805         """start and end must be strings in the format accepted by date"""
1806         if start is not None:
1807             self.start = date_from_str(start, strict=True)
1808         else:
1809             self.start = datetime.datetime.min.date()
1810         if end is not None:
1811             self.end = date_from_str(end, strict=True)
1812         else:
1813             self.end = datetime.datetime.max.date()
1814         if self.start > self.end:
1815             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1816
1817     @classmethod
1818     def day(cls, day):
1819         """Returns a range that only contains the given day"""
1820         return cls(day, day)
1821
1822     def __contains__(self, date):
1823         """Check if the date is in the range"""
1824         if not isinstance(date, datetime.date):
1825             date = date_from_str(date)
1826         return self.start <= date <= self.end
1827
1828     def __str__(self):
1829         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1830
1831
1832 def platform_name():
1833     """ Returns the platform name as a compat_str """
1834     res = platform.platform()
1835     if isinstance(res, bytes):
1836         res = res.decode(preferredencoding())
1837
1838     assert isinstance(res, compat_str)
1839     return res
1840
1841
1842 def get_windows_version():
1843     ''' Get Windows version. None if it's not running on Windows '''
1844     if compat_os_name == 'nt':
1845         return version_tuple(platform.win32_ver()[1])
1846     else:
1847         return None
1848
1849
1850 def write_string(s, out=None, encoding=None):
1851     assert isinstance(s, str)
1852     out = out or sys.stderr
1853
1854     if 'b' in getattr(out, 'mode', ''):
1855         byt = s.encode(encoding or preferredencoding(), 'ignore')
1856         out.write(byt)
1857     elif hasattr(out, 'buffer'):
1858         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1859         byt = s.encode(enc, 'ignore')
1860         out.buffer.write(byt)
1861     else:
1862         out.write(s)
1863     out.flush()
1864
1865
1866 def bytes_to_intlist(bs):
1867     if not bs:
1868         return []
1869     if isinstance(bs[0], int):  # Python 3
1870         return list(bs)
1871     else:
1872         return [ord(c) for c in bs]
1873
1874
1875 def intlist_to_bytes(xs):
1876     if not xs:
1877         return b''
1878     return compat_struct_pack('%dB' % len(xs), *xs)
1879
1880
1881 class LockingUnsupportedError(IOError):
1882     msg = 'File locking is not supported on this platform'
1883
1884     def __init__(self):
1885         super().__init__(self.msg)
1886
1887
1888 # Cross-platform file locking
1889 if sys.platform == 'win32':
1890     import ctypes.wintypes
1891     import msvcrt
1892
1893     class OVERLAPPED(ctypes.Structure):
1894         _fields_ = [
1895             ('Internal', ctypes.wintypes.LPVOID),
1896             ('InternalHigh', ctypes.wintypes.LPVOID),
1897             ('Offset', ctypes.wintypes.DWORD),
1898             ('OffsetHigh', ctypes.wintypes.DWORD),
1899             ('hEvent', ctypes.wintypes.HANDLE),
1900         ]
1901
1902     kernel32 = ctypes.windll.kernel32
1903     LockFileEx = kernel32.LockFileEx
1904     LockFileEx.argtypes = [
1905         ctypes.wintypes.HANDLE,     # hFile
1906         ctypes.wintypes.DWORD,      # dwFlags
1907         ctypes.wintypes.DWORD,      # dwReserved
1908         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1909         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1910         ctypes.POINTER(OVERLAPPED)  # Overlapped
1911     ]
1912     LockFileEx.restype = ctypes.wintypes.BOOL
1913     UnlockFileEx = kernel32.UnlockFileEx
1914     UnlockFileEx.argtypes = [
1915         ctypes.wintypes.HANDLE,     # hFile
1916         ctypes.wintypes.DWORD,      # dwReserved
1917         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1918         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1919         ctypes.POINTER(OVERLAPPED)  # Overlapped
1920     ]
1921     UnlockFileEx.restype = ctypes.wintypes.BOOL
1922     whole_low = 0xffffffff
1923     whole_high = 0x7fffffff
1924
1925     def _lock_file(f, exclusive, block):
1926         overlapped = OVERLAPPED()
1927         overlapped.Offset = 0
1928         overlapped.OffsetHigh = 0
1929         overlapped.hEvent = 0
1930         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1931
1932         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
1933                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
1934                           0, whole_low, whole_high, f._lock_file_overlapped_p):
1935             raise BlockingIOError('Locking file failed: %r' % ctypes.FormatError())
1936
1937     def _unlock_file(f):
1938         assert f._lock_file_overlapped_p
1939         handle = msvcrt.get_osfhandle(f.fileno())
1940         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
1941             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1942
1943 else:
1944     try:
1945         import fcntl
1946
1947         def _lock_file(f, exclusive, block):
1948             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
1949             if not block:
1950                 flags |= fcntl.LOCK_NB
1951             try:
1952                 fcntl.flock(f, flags)
1953             except BlockingIOError:
1954                 raise
1955             except OSError:  # AOSP does not have flock()
1956                 fcntl.lockf(f, flags)
1957
1958         def _unlock_file(f):
1959             try:
1960                 fcntl.flock(f, fcntl.LOCK_UN)
1961             except OSError:
1962                 fcntl.lockf(f, fcntl.LOCK_UN)
1963
1964     except ImportError:
1965
1966         def _lock_file(f, exclusive, block):
1967             raise LockingUnsupportedError()
1968
1969         def _unlock_file(f):
1970             raise LockingUnsupportedError()
1971
1972
1973 class locked_file:
1974     locked = False
1975
1976     def __init__(self, filename, mode, block=True, encoding=None):
1977         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
1978             raise NotImplementedError(mode)
1979         self.mode, self.block = mode, block
1980
1981         writable = any(f in mode for f in 'wax+')
1982         readable = any(f in mode for f in 'r+')
1983         flags = functools.reduce(operator.ior, (
1984             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
1985             getattr(os, 'O_BINARY', 0),  # Windows only
1986             getattr(os, 'O_NOINHERIT', 0),  # Windows only
1987             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
1988             os.O_APPEND if 'a' in mode else 0,
1989             os.O_EXCL if 'x' in mode else 0,
1990             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
1991         ))
1992
1993         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
1994
1995     def __enter__(self):
1996         exclusive = 'r' not in self.mode
1997         try:
1998             _lock_file(self.f, exclusive, self.block)
1999             self.locked = True
2000         except OSError:
2001             self.f.close()
2002             raise
2003         if 'w' in self.mode:
2004             self.f.truncate()
2005         return self
2006
2007     def unlock(self):
2008         if not self.locked:
2009             return
2010         try:
2011             _unlock_file(self.f)
2012         finally:
2013             self.locked = False
2014
2015     def __exit__(self, *_):
2016         try:
2017             self.unlock()
2018         finally:
2019             self.f.close()
2020
2021     open = __enter__
2022     close = __exit__
2023
2024     def __getattr__(self, attr):
2025         return getattr(self.f, attr)
2026
2027     def __iter__(self):
2028         return iter(self.f)
2029
2030
2031 def get_filesystem_encoding():
2032     encoding = sys.getfilesystemencoding()
2033     return encoding if encoding is not None else 'utf-8'
2034
2035
2036 def shell_quote(args):
2037     quoted_args = []
2038     encoding = get_filesystem_encoding()
2039     for a in args:
2040         if isinstance(a, bytes):
2041             # We may get a filename encoded with 'encodeFilename'
2042             a = a.decode(encoding)
2043         quoted_args.append(compat_shlex_quote(a))
2044     return ' '.join(quoted_args)
2045
2046
2047 def smuggle_url(url, data):
2048     """ Pass additional data in a URL for internal use. """
2049
2050     url, idata = unsmuggle_url(url, {})
2051     data.update(idata)
2052     sdata = compat_urllib_parse_urlencode(
2053         {'__youtubedl_smuggle': json.dumps(data)})
2054     return url + '#' + sdata
2055
2056
2057 def unsmuggle_url(smug_url, default=None):
2058     if '#__youtubedl_smuggle' not in smug_url:
2059         return smug_url, default
2060     url, _, sdata = smug_url.rpartition('#')
2061     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
2062     data = json.loads(jsond)
2063     return url, data
2064
2065
2066 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2067     """ Formats numbers with decimal sufixes like K, M, etc """
2068     num, factor = float_or_none(num), float(factor)
2069     if num is None or num < 0:
2070         return None
2071     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2072     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2073     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2074     if factor == 1024:
2075         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2076     converted = num / (factor ** exponent)
2077     return fmt % (converted, suffix)
2078
2079
2080 def format_bytes(bytes):
2081     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2082
2083
2084 def lookup_unit_table(unit_table, s):
2085     units_re = '|'.join(re.escape(u) for u in unit_table)
2086     m = re.match(
2087         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2088     if not m:
2089         return None
2090     num_str = m.group('num').replace(',', '.')
2091     mult = unit_table[m.group('unit')]
2092     return int(float(num_str) * mult)
2093
2094
2095 def parse_filesize(s):
2096     if s is None:
2097         return None
2098
2099     # The lower-case forms are of course incorrect and unofficial,
2100     # but we support those too
2101     _UNIT_TABLE = {
2102         'B': 1,
2103         'b': 1,
2104         'bytes': 1,
2105         'KiB': 1024,
2106         'KB': 1000,
2107         'kB': 1024,
2108         'Kb': 1000,
2109         'kb': 1000,
2110         'kilobytes': 1000,
2111         'kibibytes': 1024,
2112         'MiB': 1024 ** 2,
2113         'MB': 1000 ** 2,
2114         'mB': 1024 ** 2,
2115         'Mb': 1000 ** 2,
2116         'mb': 1000 ** 2,
2117         'megabytes': 1000 ** 2,
2118         'mebibytes': 1024 ** 2,
2119         'GiB': 1024 ** 3,
2120         'GB': 1000 ** 3,
2121         'gB': 1024 ** 3,
2122         'Gb': 1000 ** 3,
2123         'gb': 1000 ** 3,
2124         'gigabytes': 1000 ** 3,
2125         'gibibytes': 1024 ** 3,
2126         'TiB': 1024 ** 4,
2127         'TB': 1000 ** 4,
2128         'tB': 1024 ** 4,
2129         'Tb': 1000 ** 4,
2130         'tb': 1000 ** 4,
2131         'terabytes': 1000 ** 4,
2132         'tebibytes': 1024 ** 4,
2133         'PiB': 1024 ** 5,
2134         'PB': 1000 ** 5,
2135         'pB': 1024 ** 5,
2136         'Pb': 1000 ** 5,
2137         'pb': 1000 ** 5,
2138         'petabytes': 1000 ** 5,
2139         'pebibytes': 1024 ** 5,
2140         'EiB': 1024 ** 6,
2141         'EB': 1000 ** 6,
2142         'eB': 1024 ** 6,
2143         'Eb': 1000 ** 6,
2144         'eb': 1000 ** 6,
2145         'exabytes': 1000 ** 6,
2146         'exbibytes': 1024 ** 6,
2147         'ZiB': 1024 ** 7,
2148         'ZB': 1000 ** 7,
2149         'zB': 1024 ** 7,
2150         'Zb': 1000 ** 7,
2151         'zb': 1000 ** 7,
2152         'zettabytes': 1000 ** 7,
2153         'zebibytes': 1024 ** 7,
2154         'YiB': 1024 ** 8,
2155         'YB': 1000 ** 8,
2156         'yB': 1024 ** 8,
2157         'Yb': 1000 ** 8,
2158         'yb': 1000 ** 8,
2159         'yottabytes': 1000 ** 8,
2160         'yobibytes': 1024 ** 8,
2161     }
2162
2163     return lookup_unit_table(_UNIT_TABLE, s)
2164
2165
2166 def parse_count(s):
2167     if s is None:
2168         return None
2169
2170     s = re.sub(r'^[^\d]+\s', '', s).strip()
2171
2172     if re.match(r'^[\d,.]+$', s):
2173         return str_to_int(s)
2174
2175     _UNIT_TABLE = {
2176         'k': 1000,
2177         'K': 1000,
2178         'm': 1000 ** 2,
2179         'M': 1000 ** 2,
2180         'kk': 1000 ** 2,
2181         'KK': 1000 ** 2,
2182         'b': 1000 ** 3,
2183         'B': 1000 ** 3,
2184     }
2185
2186     ret = lookup_unit_table(_UNIT_TABLE, s)
2187     if ret is not None:
2188         return ret
2189
2190     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2191     if mobj:
2192         return str_to_int(mobj.group(1))
2193
2194
2195 def parse_resolution(s, *, lenient=False):
2196     if s is None:
2197         return {}
2198
2199     if lenient:
2200         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2201     else:
2202         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2203     if mobj:
2204         return {
2205             'width': int(mobj.group('w')),
2206             'height': int(mobj.group('h')),
2207         }
2208
2209     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2210     if mobj:
2211         return {'height': int(mobj.group(1))}
2212
2213     mobj = re.search(r'\b([48])[kK]\b', s)
2214     if mobj:
2215         return {'height': int(mobj.group(1)) * 540}
2216
2217     return {}
2218
2219
2220 def parse_bitrate(s):
2221     if not isinstance(s, compat_str):
2222         return
2223     mobj = re.search(r'\b(\d+)\s*kbps', s)
2224     if mobj:
2225         return int(mobj.group(1))
2226
2227
2228 def month_by_name(name, lang='en'):
2229     """ Return the number of a month by (locale-independently) English name """
2230
2231     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2232
2233     try:
2234         return month_names.index(name) + 1
2235     except ValueError:
2236         return None
2237
2238
2239 def month_by_abbreviation(abbrev):
2240     """ Return the number of a month by (locale-independently) English
2241         abbreviations """
2242
2243     try:
2244         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2245     except ValueError:
2246         return None
2247
2248
2249 def fix_xml_ampersands(xml_str):
2250     """Replace all the '&' by '&amp;' in XML"""
2251     return re.sub(
2252         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2253         '&amp;',
2254         xml_str)
2255
2256
2257 def setproctitle(title):
2258     assert isinstance(title, compat_str)
2259
2260     # ctypes in Jython is not complete
2261     # http://bugs.jython.org/issue2148
2262     if sys.platform.startswith('java'):
2263         return
2264
2265     try:
2266         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2267     except OSError:
2268         return
2269     except TypeError:
2270         # LoadLibrary in Windows Python 2.7.13 only expects
2271         # a bytestring, but since unicode_literals turns
2272         # every string into a unicode string, it fails.
2273         return
2274     title_bytes = title.encode('utf-8')
2275     buf = ctypes.create_string_buffer(len(title_bytes))
2276     buf.value = title_bytes
2277     try:
2278         libc.prctl(15, buf, 0, 0, 0)
2279     except AttributeError:
2280         return  # Strange libc, just skip this
2281
2282
2283 def remove_start(s, start):
2284     return s[len(start):] if s is not None and s.startswith(start) else s
2285
2286
2287 def remove_end(s, end):
2288     return s[:-len(end)] if s is not None and s.endswith(end) else s
2289
2290
2291 def remove_quotes(s):
2292     if s is None or len(s) < 2:
2293         return s
2294     for quote in ('"', "'", ):
2295         if s[0] == quote and s[-1] == quote:
2296             return s[1:-1]
2297     return s
2298
2299
2300 def get_domain(url):
2301     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
2302     return domain.group('domain') if domain else None
2303
2304
2305 def url_basename(url):
2306     path = compat_urlparse.urlparse(url).path
2307     return path.strip('/').split('/')[-1]
2308
2309
2310 def base_url(url):
2311     return re.match(r'https?://[^?#&]+/', url).group()
2312
2313
2314 def urljoin(base, path):
2315     if isinstance(path, bytes):
2316         path = path.decode('utf-8')
2317     if not isinstance(path, compat_str) or not path:
2318         return None
2319     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2320         return path
2321     if isinstance(base, bytes):
2322         base = base.decode('utf-8')
2323     if not isinstance(base, compat_str) or not re.match(
2324             r'^(?:https?:)?//', base):
2325         return None
2326     return compat_urlparse.urljoin(base, path)
2327
2328
2329 class HEADRequest(compat_urllib_request.Request):
2330     def get_method(self):
2331         return 'HEAD'
2332
2333
2334 class PUTRequest(compat_urllib_request.Request):
2335     def get_method(self):
2336         return 'PUT'
2337
2338
2339 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2340     if get_attr and v is not None:
2341         v = getattr(v, get_attr, None)
2342     try:
2343         return int(v) * invscale // scale
2344     except (ValueError, TypeError, OverflowError):
2345         return default
2346
2347
2348 def str_or_none(v, default=None):
2349     return default if v is None else compat_str(v)
2350
2351
2352 def str_to_int(int_str):
2353     """ A more relaxed version of int_or_none """
2354     if isinstance(int_str, int):
2355         return int_str
2356     elif isinstance(int_str, compat_str):
2357         int_str = re.sub(r'[,\.\+]', '', int_str)
2358         return int_or_none(int_str)
2359
2360
2361 def float_or_none(v, scale=1, invscale=1, default=None):
2362     if v is None:
2363         return default
2364     try:
2365         return float(v) * invscale / scale
2366     except (ValueError, TypeError):
2367         return default
2368
2369
2370 def bool_or_none(v, default=None):
2371     return v if isinstance(v, bool) else default
2372
2373
2374 def strip_or_none(v, default=None):
2375     return v.strip() if isinstance(v, compat_str) else default
2376
2377
2378 def url_or_none(url):
2379     if not url or not isinstance(url, compat_str):
2380         return None
2381     url = url.strip()
2382     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2383
2384
2385 def request_to_url(req):
2386     if isinstance(req, compat_urllib_request.Request):
2387         return req.get_full_url()
2388     else:
2389         return req
2390
2391
2392 def strftime_or_none(timestamp, date_format, default=None):
2393     datetime_object = None
2394     try:
2395         if isinstance(timestamp, (int, float)):  # unix timestamp
2396             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2397         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
2398             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2399         return datetime_object.strftime(date_format)
2400     except (ValueError, TypeError, AttributeError):
2401         return default
2402
2403
2404 def parse_duration(s):
2405     if not isinstance(s, str):
2406         return None
2407     s = s.strip()
2408     if not s:
2409         return None
2410
2411     days, hours, mins, secs, ms = [None] * 5
2412     m = re.match(r'''(?x)
2413             (?P<before_secs>
2414                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2415             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2416             (?P<ms>[.:][0-9]+)?Z?$
2417         ''', s)
2418     if m:
2419         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2420     else:
2421         m = re.match(
2422             r'''(?ix)(?:P?
2423                 (?:
2424                     [0-9]+\s*y(?:ears?)?,?\s*
2425                 )?
2426                 (?:
2427                     [0-9]+\s*m(?:onths?)?,?\s*
2428                 )?
2429                 (?:
2430                     [0-9]+\s*w(?:eeks?)?,?\s*
2431                 )?
2432                 (?:
2433                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2434                 )?
2435                 T)?
2436                 (?:
2437                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2438                 )?
2439                 (?:
2440                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2441                 )?
2442                 (?:
2443                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2444                 )?Z?$''', s)
2445         if m:
2446             days, hours, mins, secs, ms = m.groups()
2447         else:
2448             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2449             if m:
2450                 hours, mins = m.groups()
2451             else:
2452                 return None
2453
2454     if ms:
2455         ms = ms.replace(':', '.')
2456     return sum(float(part or 0) * mult for part, mult in (
2457         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2458
2459
2460 def prepend_extension(filename, ext, expected_real_ext=None):
2461     name, real_ext = os.path.splitext(filename)
2462     return (
2463         f'{name}.{ext}{real_ext}'
2464         if not expected_real_ext or real_ext[1:] == expected_real_ext
2465         else f'{filename}.{ext}')
2466
2467
2468 def replace_extension(filename, ext, expected_real_ext=None):
2469     name, real_ext = os.path.splitext(filename)
2470     return '{}.{}'.format(
2471         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2472         ext)
2473
2474
2475 def check_executable(exe, args=[]):
2476     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2477     args can be a list of arguments for a short output (like -version) """
2478     try:
2479         Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
2480     except OSError:
2481         return False
2482     return exe
2483
2484
2485 def _get_exe_version_output(exe, args, *, to_screen=None):
2486     if to_screen:
2487         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2488     try:
2489         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2490         # SIGTTOU if yt-dlp is run in the background.
2491         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2492         out, _ = Popen(
2493             [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
2494             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
2495     except OSError:
2496         return False
2497     if isinstance(out, bytes):  # Python 2.x
2498         out = out.decode('ascii', 'ignore')
2499     return out
2500
2501
2502 def detect_exe_version(output, version_re=None, unrecognized='present'):
2503     assert isinstance(output, compat_str)
2504     if version_re is None:
2505         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2506     m = re.search(version_re, output)
2507     if m:
2508         return m.group(1)
2509     else:
2510         return unrecognized
2511
2512
2513 def get_exe_version(exe, args=['--version'],
2514                     version_re=None, unrecognized='present'):
2515     """ Returns the version of the specified executable,
2516     or False if the executable is not present """
2517     out = _get_exe_version_output(exe, args)
2518     return detect_exe_version(out, version_re, unrecognized) if out else False
2519
2520
2521 class LazyList(collections.abc.Sequence):
2522     ''' Lazy immutable list from an iterable
2523     Note that slices of a LazyList are lists and not LazyList'''
2524
2525     class IndexError(IndexError):
2526         pass
2527
2528     def __init__(self, iterable, *, reverse=False, _cache=None):
2529         self.__iterable = iter(iterable)
2530         self.__cache = [] if _cache is None else _cache
2531         self.__reversed = reverse
2532
2533     def __iter__(self):
2534         if self.__reversed:
2535             # We need to consume the entire iterable to iterate in reverse
2536             yield from self.exhaust()
2537             return
2538         yield from self.__cache
2539         for item in self.__iterable:
2540             self.__cache.append(item)
2541             yield item
2542
2543     def __exhaust(self):
2544         self.__cache.extend(self.__iterable)
2545         # Discard the emptied iterable to make it pickle-able
2546         self.__iterable = []
2547         return self.__cache
2548
2549     def exhaust(self):
2550         ''' Evaluate the entire iterable '''
2551         return self.__exhaust()[::-1 if self.__reversed else 1]
2552
2553     @staticmethod
2554     def __reverse_index(x):
2555         return None if x is None else -(x + 1)
2556
2557     def __getitem__(self, idx):
2558         if isinstance(idx, slice):
2559             if self.__reversed:
2560                 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
2561             start, stop, step = idx.start, idx.stop, idx.step or 1
2562         elif isinstance(idx, int):
2563             if self.__reversed:
2564                 idx = self.__reverse_index(idx)
2565             start, stop, step = idx, idx, 0
2566         else:
2567             raise TypeError('indices must be integers or slices')
2568         if ((start or 0) < 0 or (stop or 0) < 0
2569                 or (start is None and step < 0)
2570                 or (stop is None and step > 0)):
2571             # We need to consume the entire iterable to be able to slice from the end
2572             # Obviously, never use this with infinite iterables
2573             self.__exhaust()
2574             try:
2575                 return self.__cache[idx]
2576             except IndexError as e:
2577                 raise self.IndexError(e) from e
2578         n = max(start or 0, stop or 0) - len(self.__cache) + 1
2579         if n > 0:
2580             self.__cache.extend(itertools.islice(self.__iterable, n))
2581         try:
2582             return self.__cache[idx]
2583         except IndexError as e:
2584             raise self.IndexError(e) from e
2585
2586     def __bool__(self):
2587         try:
2588             self[-1] if self.__reversed else self[0]
2589         except self.IndexError:
2590             return False
2591         return True
2592
2593     def __len__(self):
2594         self.__exhaust()
2595         return len(self.__cache)
2596
2597     def __reversed__(self):
2598         return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
2599
2600     def __copy__(self):
2601         return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
2602
2603     def __repr__(self):
2604         # repr and str should mimic a list. So we exhaust the iterable
2605         return repr(self.exhaust())
2606
2607     def __str__(self):
2608         return repr(self.exhaust())
2609
2610
2611 class PagedList:
2612
2613     class IndexError(IndexError):
2614         pass
2615
2616     def __len__(self):
2617         # This is only useful for tests
2618         return len(self.getslice())
2619
2620     def __init__(self, pagefunc, pagesize, use_cache=True):
2621         self._pagefunc = pagefunc
2622         self._pagesize = pagesize
2623         self._pagecount = float('inf')
2624         self._use_cache = use_cache
2625         self._cache = {}
2626
2627     def getpage(self, pagenum):
2628         page_results = self._cache.get(pagenum)
2629         if page_results is None:
2630             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2631         if self._use_cache:
2632             self._cache[pagenum] = page_results
2633         return page_results
2634
2635     def getslice(self, start=0, end=None):
2636         return list(self._getslice(start, end))
2637
2638     def _getslice(self, start, end):
2639         raise NotImplementedError('This method must be implemented by subclasses')
2640
2641     def __getitem__(self, idx):
2642         assert self._use_cache, 'Indexing PagedList requires cache'
2643         if not isinstance(idx, int) or idx < 0:
2644             raise TypeError('indices must be non-negative integers')
2645         entries = self.getslice(idx, idx + 1)
2646         if not entries:
2647             raise self.IndexError()
2648         return entries[0]
2649
2650
2651 class OnDemandPagedList(PagedList):
2652     """Download pages until a page with less than maximum results"""
2653
2654     def _getslice(self, start, end):
2655         for pagenum in itertools.count(start // self._pagesize):
2656             firstid = pagenum * self._pagesize
2657             nextfirstid = pagenum * self._pagesize + self._pagesize
2658             if start >= nextfirstid:
2659                 continue
2660
2661             startv = (
2662                 start % self._pagesize
2663                 if firstid <= start < nextfirstid
2664                 else 0)
2665             endv = (
2666                 ((end - 1) % self._pagesize) + 1
2667                 if (end is not None and firstid <= end <= nextfirstid)
2668                 else None)
2669
2670             try:
2671                 page_results = self.getpage(pagenum)
2672             except Exception:
2673                 self._pagecount = pagenum - 1
2674                 raise
2675             if startv != 0 or endv is not None:
2676                 page_results = page_results[startv:endv]
2677             yield from page_results
2678
2679             # A little optimization - if current page is not "full", ie. does
2680             # not contain page_size videos then we can assume that this page
2681             # is the last one - there are no more ids on further pages -
2682             # i.e. no need to query again.
2683             if len(page_results) + startv < self._pagesize:
2684                 break
2685
2686             # If we got the whole page, but the next page is not interesting,
2687             # break out early as well
2688             if end == nextfirstid:
2689                 break
2690
2691
2692 class InAdvancePagedList(PagedList):
2693     """PagedList with total number of pages known in advance"""
2694
2695     def __init__(self, pagefunc, pagecount, pagesize):
2696         PagedList.__init__(self, pagefunc, pagesize, True)
2697         self._pagecount = pagecount
2698
2699     def _getslice(self, start, end):
2700         start_page = start // self._pagesize
2701         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2702         skip_elems = start - start_page * self._pagesize
2703         only_more = None if end is None else end - start
2704         for pagenum in range(start_page, end_page):
2705             page_results = self.getpage(pagenum)
2706             if skip_elems:
2707                 page_results = page_results[skip_elems:]
2708                 skip_elems = None
2709             if only_more is not None:
2710                 if len(page_results) < only_more:
2711                     only_more -= len(page_results)
2712                 else:
2713                     yield from page_results[:only_more]
2714                     break
2715             yield from page_results
2716
2717
2718 def uppercase_escape(s):
2719     unicode_escape = codecs.getdecoder('unicode_escape')
2720     return re.sub(
2721         r'\\U[0-9a-fA-F]{8}',
2722         lambda m: unicode_escape(m.group(0))[0],
2723         s)
2724
2725
2726 def lowercase_escape(s):
2727     unicode_escape = codecs.getdecoder('unicode_escape')
2728     return re.sub(
2729         r'\\u[0-9a-fA-F]{4}',
2730         lambda m: unicode_escape(m.group(0))[0],
2731         s)
2732
2733
2734 def escape_rfc3986(s):
2735     """Escape non-ASCII characters as suggested by RFC 3986"""
2736     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2737
2738
2739 def escape_url(url):
2740     """Escape URL as suggested by RFC 3986"""
2741     url_parsed = compat_urllib_parse_urlparse(url)
2742     return url_parsed._replace(
2743         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2744         path=escape_rfc3986(url_parsed.path),
2745         params=escape_rfc3986(url_parsed.params),
2746         query=escape_rfc3986(url_parsed.query),
2747         fragment=escape_rfc3986(url_parsed.fragment)
2748     ).geturl()
2749
2750
2751 def parse_qs(url):
2752     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2753
2754
2755 def read_batch_urls(batch_fd):
2756     def fixup(url):
2757         if not isinstance(url, compat_str):
2758             url = url.decode('utf-8', 'replace')
2759         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
2760         for bom in BOM_UTF8:
2761             if url.startswith(bom):
2762                 url = url[len(bom):]
2763         url = url.lstrip()
2764         if not url or url.startswith(('#', ';', ']')):
2765             return False
2766         # "#" cannot be stripped out since it is part of the URI
2767         # However, it can be safely stipped out if follwing a whitespace
2768         return re.split(r'\s#', url, 1)[0].rstrip()
2769
2770     with contextlib.closing(batch_fd) as fd:
2771         return [url for url in map(fixup, fd) if url]
2772
2773
2774 def urlencode_postdata(*args, **kargs):
2775     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
2776
2777
2778 def update_url_query(url, query):
2779     if not query:
2780         return url
2781     parsed_url = compat_urlparse.urlparse(url)
2782     qs = compat_parse_qs(parsed_url.query)
2783     qs.update(query)
2784     return compat_urlparse.urlunparse(parsed_url._replace(
2785         query=compat_urllib_parse_urlencode(qs, True)))
2786
2787
2788 def update_Request(req, url=None, data=None, headers={}, query={}):
2789     req_headers = req.headers.copy()
2790     req_headers.update(headers)
2791     req_data = data or req.data
2792     req_url = update_url_query(url or req.get_full_url(), query)
2793     req_get_method = req.get_method()
2794     if req_get_method == 'HEAD':
2795         req_type = HEADRequest
2796     elif req_get_method == 'PUT':
2797         req_type = PUTRequest
2798     else:
2799         req_type = compat_urllib_request.Request
2800     new_req = req_type(
2801         req_url, data=req_data, headers=req_headers,
2802         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
2803     if hasattr(req, 'timeout'):
2804         new_req.timeout = req.timeout
2805     return new_req
2806
2807
2808 def _multipart_encode_impl(data, boundary):
2809     content_type = 'multipart/form-data; boundary=%s' % boundary
2810
2811     out = b''
2812     for k, v in data.items():
2813         out += b'--' + boundary.encode('ascii') + b'\r\n'
2814         if isinstance(k, compat_str):
2815             k = k.encode('utf-8')
2816         if isinstance(v, compat_str):
2817             v = v.encode('utf-8')
2818         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
2819         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
2820         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
2821         if boundary.encode('ascii') in content:
2822             raise ValueError('Boundary overlaps with data')
2823         out += content
2824
2825     out += b'--' + boundary.encode('ascii') + b'--\r\n'
2826
2827     return out, content_type
2828
2829
2830 def multipart_encode(data, boundary=None):
2831     '''
2832     Encode a dict to RFC 7578-compliant form-data
2833
2834     data:
2835         A dict where keys and values can be either Unicode or bytes-like
2836         objects.
2837     boundary:
2838         If specified a Unicode object, it's used as the boundary. Otherwise
2839         a random boundary is generated.
2840
2841     Reference: https://tools.ietf.org/html/rfc7578
2842     '''
2843     has_specified_boundary = boundary is not None
2844
2845     while True:
2846         if boundary is None:
2847             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
2848
2849         try:
2850             out, content_type = _multipart_encode_impl(data, boundary)
2851             break
2852         except ValueError:
2853             if has_specified_boundary:
2854                 raise
2855             boundary = None
2856
2857     return out, content_type
2858
2859
2860 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
2861     for val in map(d.get, variadic(key_or_keys)):
2862         if val is not None and (val or not skip_false_values):
2863             return val
2864     return default
2865
2866
2867 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
2868     for f in funcs:
2869         try:
2870             val = f(*args, **kwargs)
2871         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
2872             pass
2873         else:
2874             if expected_type is None or isinstance(val, expected_type):
2875                 return val
2876
2877
2878 def try_get(src, getter, expected_type=None):
2879     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
2880
2881
2882 def filter_dict(dct, cndn=lambda _, v: v is not None):
2883     return {k: v for k, v in dct.items() if cndn(k, v)}
2884
2885
2886 def merge_dicts(*dicts):
2887     merged = {}
2888     for a_dict in dicts:
2889         for k, v in a_dict.items():
2890             if (v is not None and k not in merged
2891                     or isinstance(v, str) and merged[k] == ''):
2892                 merged[k] = v
2893     return merged
2894
2895
2896 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
2897     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
2898
2899
2900 US_RATINGS = {
2901     'G': 0,
2902     'PG': 10,
2903     'PG-13': 13,
2904     'R': 16,
2905     'NC': 18,
2906 }
2907
2908
2909 TV_PARENTAL_GUIDELINES = {
2910     'TV-Y': 0,
2911     'TV-Y7': 7,
2912     'TV-G': 0,
2913     'TV-PG': 0,
2914     'TV-14': 14,
2915     'TV-MA': 17,
2916 }
2917
2918
2919 def parse_age_limit(s):
2920     # isinstance(False, int) is True. So type() must be used instead
2921     if type(s) is int:
2922         return s if 0 <= s <= 21 else None
2923     elif not isinstance(s, str):
2924         return None
2925     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
2926     if m:
2927         return int(m.group('age'))
2928     s = s.upper()
2929     if s in US_RATINGS:
2930         return US_RATINGS[s]
2931     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
2932     if m:
2933         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
2934     return None
2935
2936
2937 def strip_jsonp(code):
2938     return re.sub(
2939         r'''(?sx)^
2940             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
2941             (?:\s*&&\s*(?P=func_name))?
2942             \s*\(\s*(?P<callback_data>.*)\);?
2943             \s*?(?://[^\n]*)*$''',
2944         r'\g<callback_data>', code)
2945
2946
2947 def js_to_json(code, vars={}):
2948     # vars is a dict of var, val pairs to substitute
2949     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
2950     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
2951     INTEGER_TABLE = (
2952         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
2953         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
2954     )
2955
2956     def fix_kv(m):
2957         v = m.group(0)
2958         if v in ('true', 'false', 'null'):
2959             return v
2960         elif v in ('undefined', 'void 0'):
2961             return 'null'
2962         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
2963             return ""
2964
2965         if v[0] in ("'", '"'):
2966             v = re.sub(r'(?s)\\.|"', lambda m: {
2967                 '"': '\\"',
2968                 "\\'": "'",
2969                 '\\\n': '',
2970                 '\\x': '\\u00',
2971             }.get(m.group(0), m.group(0)), v[1:-1])
2972         else:
2973             for regex, base in INTEGER_TABLE:
2974                 im = re.match(regex, v)
2975                 if im:
2976                     i = int(im.group(1), base)
2977                     return '"%d":' % i if v.endswith(':') else '%d' % i
2978
2979             if v in vars:
2980                 return vars[v]
2981
2982         return '"%s"' % v
2983
2984     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
2985
2986     return re.sub(r'''(?sx)
2987         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
2988         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
2989         {comment}|,(?={skip}[\]}}])|
2990         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
2991         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
2992         [0-9]+(?={skip}:)|
2993         !+
2994         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
2995
2996
2997 def qualities(quality_ids):
2998     """ Get a numeric quality value out of a list of possible values """
2999     def q(qid):
3000         try:
3001             return quality_ids.index(qid)
3002         except ValueError:
3003             return -1
3004     return q
3005
3006
3007 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist')
3008
3009
3010 DEFAULT_OUTTMPL = {
3011     'default': '%(title)s [%(id)s].%(ext)s',
3012     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3013 }
3014 OUTTMPL_TYPES = {
3015     'chapter': None,
3016     'subtitle': None,
3017     'thumbnail': None,
3018     'description': 'description',
3019     'annotation': 'annotations.xml',
3020     'infojson': 'info.json',
3021     'link': None,
3022     'pl_video': None,
3023     'pl_thumbnail': None,
3024     'pl_description': 'description',
3025     'pl_infojson': 'info.json',
3026 }
3027
3028 # As of [1] format syntax is:
3029 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3030 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3031 STR_FORMAT_RE_TMPL = r'''(?x)
3032     (?<!%)(?P<prefix>(?:%%)*)
3033     %
3034     (?P<has_key>\((?P<key>{0})\))?
3035     (?P<format>
3036         (?P<conversion>[#0\-+ ]+)?
3037         (?P<min_width>\d+)?
3038         (?P<precision>\.\d+)?
3039         (?P<len_mod>[hlL])?  # unused in python
3040         {1}  # conversion type
3041     )
3042 '''
3043
3044
3045 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3046
3047
3048 def limit_length(s, length):
3049     """ Add ellipses to overly long strings """
3050     if s is None:
3051         return None
3052     ELLIPSES = '...'
3053     if len(s) > length:
3054         return s[:length - len(ELLIPSES)] + ELLIPSES
3055     return s
3056
3057
3058 def version_tuple(v):
3059     return tuple(int(e) for e in re.split(r'[-.]', v))
3060
3061
3062 def is_outdated_version(version, limit, assume_new=True):
3063     if not version:
3064         return not assume_new
3065     try:
3066         return version_tuple(version) < version_tuple(limit)
3067     except ValueError:
3068         return not assume_new
3069
3070
3071 def ytdl_is_updateable():
3072     """ Returns if yt-dlp can be updated with -U """
3073
3074     from .update import is_non_updateable
3075
3076     return not is_non_updateable()
3077
3078
3079 def args_to_str(args):
3080     # Get a short string representation for a subprocess command
3081     return ' '.join(compat_shlex_quote(a) for a in args)
3082
3083
3084 def error_to_compat_str(err):
3085     return str(err)
3086
3087
3088 def error_to_str(err):
3089     return f'{type(err).__name__}: {err}'
3090
3091
3092 def mimetype2ext(mt):
3093     if mt is None:
3094         return None
3095
3096     mt, _, params = mt.partition(';')
3097     mt = mt.strip()
3098
3099     FULL_MAP = {
3100         'audio/mp4': 'm4a',
3101         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3102         # it's the most popular one
3103         'audio/mpeg': 'mp3',
3104         'audio/x-wav': 'wav',
3105         'audio/wav': 'wav',
3106         'audio/wave': 'wav',
3107     }
3108
3109     ext = FULL_MAP.get(mt)
3110     if ext is not None:
3111         return ext
3112
3113     SUBTYPE_MAP = {
3114         '3gpp': '3gp',
3115         'smptett+xml': 'tt',
3116         'ttaf+xml': 'dfxp',
3117         'ttml+xml': 'ttml',
3118         'x-flv': 'flv',
3119         'x-mp4-fragmented': 'mp4',
3120         'x-ms-sami': 'sami',
3121         'x-ms-wmv': 'wmv',
3122         'mpegurl': 'm3u8',
3123         'x-mpegurl': 'm3u8',
3124         'vnd.apple.mpegurl': 'm3u8',
3125         'dash+xml': 'mpd',
3126         'f4m+xml': 'f4m',
3127         'hds+xml': 'f4m',
3128         'vnd.ms-sstr+xml': 'ism',
3129         'quicktime': 'mov',
3130         'mp2t': 'ts',
3131         'x-wav': 'wav',
3132         'filmstrip+json': 'fs',
3133         'svg+xml': 'svg',
3134     }
3135
3136     _, _, subtype = mt.rpartition('/')
3137     ext = SUBTYPE_MAP.get(subtype.lower())
3138     if ext is not None:
3139         return ext
3140
3141     SUFFIX_MAP = {
3142         'json': 'json',
3143         'xml': 'xml',
3144         'zip': 'zip',
3145         'gzip': 'gz',
3146     }
3147
3148     _, _, suffix = subtype.partition('+')
3149     ext = SUFFIX_MAP.get(suffix)
3150     if ext is not None:
3151         return ext
3152
3153     return subtype.replace('+', '.')
3154
3155
3156 def ext2mimetype(ext_or_url):
3157     if not ext_or_url:
3158         return None
3159     if '.' not in ext_or_url:
3160         ext_or_url = f'file.{ext_or_url}'
3161     return mimetypes.guess_type(ext_or_url)[0]
3162
3163
3164 def parse_codecs(codecs_str):
3165     # http://tools.ietf.org/html/rfc6381
3166     if not codecs_str:
3167         return {}
3168     split_codecs = list(filter(None, map(
3169         str.strip, codecs_str.strip().strip(',').split(','))))
3170     vcodec, acodec, tcodec, hdr = None, None, None, None
3171     for full_codec in split_codecs:
3172         parts = full_codec.split('.')
3173         codec = parts[0].replace('0', '')
3174         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3175                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3176             if not vcodec:
3177                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
3178                 if codec in ('dvh1', 'dvhe'):
3179                     hdr = 'DV'
3180                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
3181                     hdr = 'HDR10'
3182                 elif full_codec.replace('0', '').startswith('vp9.2'):
3183                     hdr = 'HDR10'
3184         elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3185             if not acodec:
3186                 acodec = full_codec
3187         elif codec in ('stpp', 'wvtt',):
3188             if not tcodec:
3189                 tcodec = full_codec
3190         else:
3191             write_string(f'WARNING: Unknown codec {full_codec}\n')
3192     if vcodec or acodec or tcodec:
3193         return {
3194             'vcodec': vcodec or 'none',
3195             'acodec': acodec or 'none',
3196             'dynamic_range': hdr,
3197             **({'tcodec': tcodec} if tcodec is not None else {}),
3198         }
3199     elif len(split_codecs) == 2:
3200         return {
3201             'vcodec': split_codecs[0],
3202             'acodec': split_codecs[1],
3203         }
3204     return {}
3205
3206
3207 def urlhandle_detect_ext(url_handle):
3208     getheader = url_handle.headers.get
3209
3210     cd = getheader('Content-Disposition')
3211     if cd:
3212         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3213         if m:
3214             e = determine_ext(m.group('filename'), default_ext=None)
3215             if e:
3216                 return e
3217
3218     return mimetype2ext(getheader('Content-Type'))
3219
3220
3221 def encode_data_uri(data, mime_type):
3222     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3223
3224
3225 def age_restricted(content_limit, age_limit):
3226     """ Returns True iff the content should be blocked """
3227
3228     if age_limit is None:  # No limit set
3229         return False
3230     if content_limit is None:
3231         return False  # Content available for everyone
3232     return age_limit < content_limit
3233
3234
3235 def is_html(first_bytes):
3236     """ Detect whether a file contains HTML by examining its first bytes. """
3237
3238     BOMS = [
3239         (b'\xef\xbb\xbf', 'utf-8'),
3240         (b'\x00\x00\xfe\xff', 'utf-32-be'),
3241         (b'\xff\xfe\x00\x00', 'utf-32-le'),
3242         (b'\xff\xfe', 'utf-16-le'),
3243         (b'\xfe\xff', 'utf-16-be'),
3244     ]
3245     for bom, enc in BOMS:
3246         if first_bytes.startswith(bom):
3247             s = first_bytes[len(bom):].decode(enc, 'replace')
3248             break
3249     else:
3250         s = first_bytes.decode('utf-8', 'replace')
3251
3252     return re.match(r'^\s*<', s)
3253
3254
3255 def determine_protocol(info_dict):
3256     protocol = info_dict.get('protocol')
3257     if protocol is not None:
3258         return protocol
3259
3260     url = sanitize_url(info_dict['url'])
3261     if url.startswith('rtmp'):
3262         return 'rtmp'
3263     elif url.startswith('mms'):
3264         return 'mms'
3265     elif url.startswith('rtsp'):
3266         return 'rtsp'
3267
3268     ext = determine_ext(url)
3269     if ext == 'm3u8':
3270         return 'm3u8'
3271     elif ext == 'f4m':
3272         return 'f4m'
3273
3274     return compat_urllib_parse_urlparse(url).scheme
3275
3276
3277 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3278     """ Render a list of rows, each as a list of values.
3279     Text after a \t will be right aligned """
3280     def width(string):
3281         return len(remove_terminal_sequences(string).replace('\t', ''))
3282
3283     def get_max_lens(table):
3284         return [max(width(str(v)) for v in col) for col in zip(*table)]
3285
3286     def filter_using_list(row, filterArray):
3287         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3288
3289     max_lens = get_max_lens(data) if hide_empty else []
3290     header_row = filter_using_list(header_row, max_lens)
3291     data = [filter_using_list(row, max_lens) for row in data]
3292
3293     table = [header_row] + data
3294     max_lens = get_max_lens(table)
3295     extra_gap += 1
3296     if delim:
3297         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3298         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3299     for row in table:
3300         for pos, text in enumerate(map(str, row)):
3301             if '\t' in text:
3302                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3303             else:
3304                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3305     ret = '\n'.join(''.join(row).rstrip() for row in table)
3306     return ret
3307
3308
3309 def _match_one(filter_part, dct, incomplete):
3310     # TODO: Generalize code with YoutubeDL._build_format_filter
3311     STRING_OPERATORS = {
3312         '*=': operator.contains,
3313         '^=': lambda attr, value: attr.startswith(value),
3314         '$=': lambda attr, value: attr.endswith(value),
3315         '~=': lambda attr, value: re.search(value, attr),
3316     }
3317     COMPARISON_OPERATORS = {
3318         **STRING_OPERATORS,
3319         '<=': operator.le,  # "<=" must be defined above "<"
3320         '<': operator.lt,
3321         '>=': operator.ge,
3322         '>': operator.gt,
3323         '=': operator.eq,
3324     }
3325
3326     if isinstance(incomplete, bool):
3327         is_incomplete = lambda _: incomplete
3328     else:
3329         is_incomplete = lambda k: k in incomplete
3330
3331     operator_rex = re.compile(r'''(?x)\s*
3332         (?P<key>[a-z_]+)
3333         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3334         (?:
3335             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3336             (?P<strval>.+?)
3337         )
3338         \s*$
3339         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3340     m = operator_rex.search(filter_part)
3341     if m:
3342         m = m.groupdict()
3343         unnegated_op = COMPARISON_OPERATORS[m['op']]
3344         if m['negation']:
3345             op = lambda attr, value: not unnegated_op(attr, value)
3346         else:
3347             op = unnegated_op
3348         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3349         if m['quote']:
3350             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3351         actual_value = dct.get(m['key'])
3352         numeric_comparison = None
3353         if isinstance(actual_value, (int, float)):
3354             # If the original field is a string and matching comparisonvalue is
3355             # a number we should respect the origin of the original field
3356             # and process comparison value as a string (see
3357             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3358             try:
3359                 numeric_comparison = int(comparison_value)
3360             except ValueError:
3361                 numeric_comparison = parse_filesize(comparison_value)
3362                 if numeric_comparison is None:
3363                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3364                 if numeric_comparison is None:
3365                     numeric_comparison = parse_duration(comparison_value)
3366         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3367             raise ValueError('Operator %s only supports string values!' % m['op'])
3368         if actual_value is None:
3369             return is_incomplete(m['key']) or m['none_inclusive']
3370         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3371
3372     UNARY_OPERATORS = {
3373         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3374         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3375     }
3376     operator_rex = re.compile(r'''(?x)\s*
3377         (?P<op>%s)\s*(?P<key>[a-z_]+)
3378         \s*$
3379         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3380     m = operator_rex.search(filter_part)
3381     if m:
3382         op = UNARY_OPERATORS[m.group('op')]
3383         actual_value = dct.get(m.group('key'))
3384         if is_incomplete(m.group('key')) and actual_value is None:
3385             return True
3386         return op(actual_value)
3387
3388     raise ValueError('Invalid filter part %r' % filter_part)
3389
3390
3391 def match_str(filter_str, dct, incomplete=False):
3392     """ Filter a dictionary with a simple string syntax.
3393     @returns           Whether the filter passes
3394     @param incomplete  Set of keys that is expected to be missing from dct.
3395                        Can be True/False to indicate all/none of the keys may be missing.
3396                        All conditions on incomplete keys pass if the key is missing
3397     """
3398     return all(
3399         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3400         for filter_part in re.split(r'(?<!\\)&', filter_str))
3401
3402
3403 def match_filter_func(filters):
3404     if not filters:
3405         return None
3406     filters = variadic(filters)
3407
3408     def _match_func(info_dict, *args, **kwargs):
3409         if any(match_str(f, info_dict, *args, **kwargs) for f in filters):
3410             return None
3411         else:
3412             video_title = info_dict.get('title') or info_dict.get('id') or 'video'
3413             filter_str = ') | ('.join(map(str.strip, filters))
3414             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3415     return _match_func
3416
3417
3418 def parse_dfxp_time_expr(time_expr):
3419     if not time_expr:
3420         return
3421
3422     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
3423     if mobj:
3424         return float(mobj.group('time_offset'))
3425
3426     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3427     if mobj:
3428         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3429
3430
3431 def srt_subtitles_timecode(seconds):
3432     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3433
3434
3435 def ass_subtitles_timecode(seconds):
3436     time = timetuple_from_msec(seconds * 1000)
3437     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3438
3439
3440 def dfxp2srt(dfxp_data):
3441     '''
3442     @param dfxp_data A bytes-like object containing DFXP data
3443     @returns A unicode object containing converted SRT data
3444     '''
3445     LEGACY_NAMESPACES = (
3446         (b'http://www.w3.org/ns/ttml', [
3447             b'http://www.w3.org/2004/11/ttaf1',
3448             b'http://www.w3.org/2006/04/ttaf1',
3449             b'http://www.w3.org/2006/10/ttaf1',
3450         ]),
3451         (b'http://www.w3.org/ns/ttml#styling', [
3452             b'http://www.w3.org/ns/ttml#style',
3453         ]),
3454     )
3455
3456     SUPPORTED_STYLING = [
3457         'color',
3458         'fontFamily',
3459         'fontSize',
3460         'fontStyle',
3461         'fontWeight',
3462         'textDecoration'
3463     ]
3464
3465     _x = functools.partial(xpath_with_ns, ns_map={
3466         'xml': 'http://www.w3.org/XML/1998/namespace',
3467         'ttml': 'http://www.w3.org/ns/ttml',
3468         'tts': 'http://www.w3.org/ns/ttml#styling',
3469     })
3470
3471     styles = {}
3472     default_style = {}
3473
3474     class TTMLPElementParser:
3475         _out = ''
3476         _unclosed_elements = []
3477         _applied_styles = []
3478
3479         def start(self, tag, attrib):
3480             if tag in (_x('ttml:br'), 'br'):
3481                 self._out += '\n'
3482             else:
3483                 unclosed_elements = []
3484                 style = {}
3485                 element_style_id = attrib.get('style')
3486                 if default_style:
3487                     style.update(default_style)
3488                 if element_style_id:
3489                     style.update(styles.get(element_style_id, {}))
3490                 for prop in SUPPORTED_STYLING:
3491                     prop_val = attrib.get(_x('tts:' + prop))
3492                     if prop_val:
3493                         style[prop] = prop_val
3494                 if style:
3495                     font = ''
3496                     for k, v in sorted(style.items()):
3497                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3498                             continue
3499                         if k == 'color':
3500                             font += ' color="%s"' % v
3501                         elif k == 'fontSize':
3502                             font += ' size="%s"' % v
3503                         elif k == 'fontFamily':
3504                             font += ' face="%s"' % v
3505                         elif k == 'fontWeight' and v == 'bold':
3506                             self._out += '<b>'
3507                             unclosed_elements.append('b')
3508                         elif k == 'fontStyle' and v == 'italic':
3509                             self._out += '<i>'
3510                             unclosed_elements.append('i')
3511                         elif k == 'textDecoration' and v == 'underline':
3512                             self._out += '<u>'
3513                             unclosed_elements.append('u')
3514                     if font:
3515                         self._out += '<font' + font + '>'
3516                         unclosed_elements.append('font')
3517                     applied_style = {}
3518                     if self._applied_styles:
3519                         applied_style.update(self._applied_styles[-1])
3520                     applied_style.update(style)
3521                     self._applied_styles.append(applied_style)
3522                 self._unclosed_elements.append(unclosed_elements)
3523
3524         def end(self, tag):
3525             if tag not in (_x('ttml:br'), 'br'):
3526                 unclosed_elements = self._unclosed_elements.pop()
3527                 for element in reversed(unclosed_elements):
3528                     self._out += '</%s>' % element
3529                 if unclosed_elements and self._applied_styles:
3530                     self._applied_styles.pop()
3531
3532         def data(self, data):
3533             self._out += data
3534
3535         def close(self):
3536             return self._out.strip()
3537
3538     def parse_node(node):
3539         target = TTMLPElementParser()
3540         parser = xml.etree.ElementTree.XMLParser(target=target)
3541         parser.feed(xml.etree.ElementTree.tostring(node))
3542         return parser.close()
3543
3544     for k, v in LEGACY_NAMESPACES:
3545         for ns in v:
3546             dfxp_data = dfxp_data.replace(ns, k)
3547
3548     dfxp = compat_etree_fromstring(dfxp_data)
3549     out = []
3550     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3551
3552     if not paras:
3553         raise ValueError('Invalid dfxp/TTML subtitle')
3554
3555     repeat = False
3556     while True:
3557         for style in dfxp.findall(_x('.//ttml:style')):
3558             style_id = style.get('id') or style.get(_x('xml:id'))
3559             if not style_id:
3560                 continue
3561             parent_style_id = style.get('style')
3562             if parent_style_id:
3563                 if parent_style_id not in styles:
3564                     repeat = True
3565                     continue
3566                 styles[style_id] = styles[parent_style_id].copy()
3567             for prop in SUPPORTED_STYLING:
3568                 prop_val = style.get(_x('tts:' + prop))
3569                 if prop_val:
3570                     styles.setdefault(style_id, {})[prop] = prop_val
3571         if repeat:
3572             repeat = False
3573         else:
3574             break
3575
3576     for p in ('body', 'div'):
3577         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3578         if ele is None:
3579             continue
3580         style = styles.get(ele.get('style'))
3581         if not style:
3582             continue
3583         default_style.update(style)
3584
3585     for para, index in zip(paras, itertools.count(1)):
3586         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3587         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3588         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3589         if begin_time is None:
3590             continue
3591         if not end_time:
3592             if not dur:
3593                 continue
3594             end_time = begin_time + dur
3595         out.append('%d\n%s --> %s\n%s\n\n' % (
3596             index,
3597             srt_subtitles_timecode(begin_time),
3598             srt_subtitles_timecode(end_time),
3599             parse_node(para)))
3600
3601     return ''.join(out)
3602
3603
3604 def cli_option(params, command_option, param):
3605     param = params.get(param)
3606     if param:
3607         param = compat_str(param)
3608     return [command_option, param] if param is not None else []
3609
3610
3611 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3612     param = params.get(param)
3613     if param is None:
3614         return []
3615     assert isinstance(param, bool)
3616     if separator:
3617         return [command_option + separator + (true_value if param else false_value)]
3618     return [command_option, true_value if param else false_value]
3619
3620
3621 def cli_valueless_option(params, command_option, param, expected_value=True):
3622     param = params.get(param)
3623     return [command_option] if param == expected_value else []
3624
3625
3626 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3627     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3628         if use_compat:
3629             return argdict
3630         else:
3631             argdict = None
3632     if argdict is None:
3633         return default
3634     assert isinstance(argdict, dict)
3635
3636     assert isinstance(keys, (list, tuple))
3637     for key_list in keys:
3638         arg_list = list(filter(
3639             lambda x: x is not None,
3640             [argdict.get(key.lower()) for key in variadic(key_list)]))
3641         if arg_list:
3642             return [arg for args in arg_list for arg in args]
3643     return default
3644
3645
3646 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3647     main_key, exe = main_key.lower(), exe.lower()
3648     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3649     keys = [f'{root_key}{k}' for k in (keys or [''])]
3650     if root_key in keys:
3651         if main_key != exe:
3652             keys.append((main_key, exe))
3653         keys.append('default')
3654     else:
3655         use_compat = False
3656     return cli_configuration_args(argdict, keys, default, use_compat)
3657
3658
3659 class ISO639Utils:
3660     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3661     _lang_map = {
3662         'aa': 'aar',
3663         'ab': 'abk',
3664         'ae': 'ave',
3665         'af': 'afr',
3666         'ak': 'aka',
3667         'am': 'amh',
3668         'an': 'arg',
3669         'ar': 'ara',
3670         'as': 'asm',
3671         'av': 'ava',
3672         'ay': 'aym',
3673         'az': 'aze',
3674         'ba': 'bak',
3675         'be': 'bel',
3676         'bg': 'bul',
3677         'bh': 'bih',
3678         'bi': 'bis',
3679         'bm': 'bam',
3680         'bn': 'ben',
3681         'bo': 'bod',
3682         'br': 'bre',
3683         'bs': 'bos',
3684         'ca': 'cat',
3685         'ce': 'che',
3686         'ch': 'cha',
3687         'co': 'cos',
3688         'cr': 'cre',
3689         'cs': 'ces',
3690         'cu': 'chu',
3691         'cv': 'chv',
3692         'cy': 'cym',
3693         'da': 'dan',
3694         'de': 'deu',
3695         'dv': 'div',
3696         'dz': 'dzo',
3697         'ee': 'ewe',
3698         'el': 'ell',
3699         'en': 'eng',
3700         'eo': 'epo',
3701         'es': 'spa',
3702         'et': 'est',
3703         'eu': 'eus',
3704         'fa': 'fas',
3705         'ff': 'ful',
3706         'fi': 'fin',
3707         'fj': 'fij',
3708         'fo': 'fao',
3709         'fr': 'fra',
3710         'fy': 'fry',
3711         'ga': 'gle',
3712         'gd': 'gla',
3713         'gl': 'glg',
3714         'gn': 'grn',
3715         'gu': 'guj',
3716         'gv': 'glv',
3717         'ha': 'hau',
3718         'he': 'heb',
3719         'iw': 'heb',  # Replaced by he in 1989 revision
3720         'hi': 'hin',
3721         'ho': 'hmo',
3722         'hr': 'hrv',
3723         'ht': 'hat',
3724         'hu': 'hun',
3725         'hy': 'hye',
3726         'hz': 'her',
3727         'ia': 'ina',
3728         'id': 'ind',
3729         'in': 'ind',  # Replaced by id in 1989 revision
3730         'ie': 'ile',
3731         'ig': 'ibo',
3732         'ii': 'iii',
3733         'ik': 'ipk',
3734         'io': 'ido',
3735         'is': 'isl',
3736         'it': 'ita',
3737         'iu': 'iku',
3738         'ja': 'jpn',
3739         'jv': 'jav',
3740         'ka': 'kat',
3741         'kg': 'kon',
3742         'ki': 'kik',
3743         'kj': 'kua',
3744         'kk': 'kaz',
3745         'kl': 'kal',
3746         'km': 'khm',
3747         'kn': 'kan',
3748         'ko': 'kor',
3749         'kr': 'kau',
3750         'ks': 'kas',
3751         'ku': 'kur',
3752         'kv': 'kom',
3753         'kw': 'cor',
3754         'ky': 'kir',
3755         'la': 'lat',
3756         'lb': 'ltz',
3757         'lg': 'lug',
3758         'li': 'lim',
3759         'ln': 'lin',
3760         'lo': 'lao',
3761         'lt': 'lit',
3762         'lu': 'lub',
3763         'lv': 'lav',
3764         'mg': 'mlg',
3765         'mh': 'mah',
3766         'mi': 'mri',
3767         'mk': 'mkd',
3768         'ml': 'mal',
3769         'mn': 'mon',
3770         'mr': 'mar',
3771         'ms': 'msa',
3772         'mt': 'mlt',
3773         'my': 'mya',
3774         'na': 'nau',
3775         'nb': 'nob',
3776         'nd': 'nde',
3777         'ne': 'nep',
3778         'ng': 'ndo',
3779         'nl': 'nld',
3780         'nn': 'nno',
3781         'no': 'nor',
3782         'nr': 'nbl',
3783         'nv': 'nav',
3784         'ny': 'nya',
3785         'oc': 'oci',
3786         'oj': 'oji',
3787         'om': 'orm',
3788         'or': 'ori',
3789         'os': 'oss',
3790         'pa': 'pan',
3791         'pi': 'pli',
3792         'pl': 'pol',
3793         'ps': 'pus',
3794         'pt': 'por',
3795         'qu': 'que',
3796         'rm': 'roh',
3797         'rn': 'run',
3798         'ro': 'ron',
3799         'ru': 'rus',
3800         'rw': 'kin',
3801         'sa': 'san',
3802         'sc': 'srd',
3803         'sd': 'snd',
3804         'se': 'sme',
3805         'sg': 'sag',
3806         'si': 'sin',
3807         'sk': 'slk',
3808         'sl': 'slv',
3809         'sm': 'smo',
3810         'sn': 'sna',
3811         'so': 'som',
3812         'sq': 'sqi',
3813         'sr': 'srp',
3814         'ss': 'ssw',
3815         'st': 'sot',
3816         'su': 'sun',
3817         'sv': 'swe',
3818         'sw': 'swa',
3819         'ta': 'tam',
3820         'te': 'tel',
3821         'tg': 'tgk',
3822         'th': 'tha',
3823         'ti': 'tir',
3824         'tk': 'tuk',
3825         'tl': 'tgl',
3826         'tn': 'tsn',
3827         'to': 'ton',
3828         'tr': 'tur',
3829         'ts': 'tso',
3830         'tt': 'tat',
3831         'tw': 'twi',
3832         'ty': 'tah',
3833         'ug': 'uig',
3834         'uk': 'ukr',
3835         'ur': 'urd',
3836         'uz': 'uzb',
3837         've': 'ven',
3838         'vi': 'vie',
3839         'vo': 'vol',
3840         'wa': 'wln',
3841         'wo': 'wol',
3842         'xh': 'xho',
3843         'yi': 'yid',
3844         'ji': 'yid',  # Replaced by yi in 1989 revision
3845         'yo': 'yor',
3846         'za': 'zha',
3847         'zh': 'zho',
3848         'zu': 'zul',
3849     }
3850
3851     @classmethod
3852     def short2long(cls, code):
3853         """Convert language code from ISO 639-1 to ISO 639-2/T"""
3854         return cls._lang_map.get(code[:2])
3855
3856     @classmethod
3857     def long2short(cls, code):
3858         """Convert language code from ISO 639-2/T to ISO 639-1"""
3859         for short_name, long_name in cls._lang_map.items():
3860             if long_name == code:
3861                 return short_name
3862
3863
3864 class ISO3166Utils:
3865     # From http://data.okfn.org/data/core/country-list
3866     _country_map = {
3867         'AF': 'Afghanistan',
3868         'AX': 'Åland Islands',
3869         'AL': 'Albania',
3870         'DZ': 'Algeria',
3871         'AS': 'American Samoa',
3872         'AD': 'Andorra',
3873         'AO': 'Angola',
3874         'AI': 'Anguilla',
3875         'AQ': 'Antarctica',
3876         'AG': 'Antigua and Barbuda',
3877         'AR': 'Argentina',
3878         'AM': 'Armenia',
3879         'AW': 'Aruba',
3880         'AU': 'Australia',
3881         'AT': 'Austria',
3882         'AZ': 'Azerbaijan',
3883         'BS': 'Bahamas',
3884         'BH': 'Bahrain',
3885         'BD': 'Bangladesh',
3886         'BB': 'Barbados',
3887         'BY': 'Belarus',
3888         'BE': 'Belgium',
3889         'BZ': 'Belize',
3890         'BJ': 'Benin',
3891         'BM': 'Bermuda',
3892         'BT': 'Bhutan',
3893         'BO': 'Bolivia, Plurinational State of',
3894         'BQ': 'Bonaire, Sint Eustatius and Saba',
3895         'BA': 'Bosnia and Herzegovina',
3896         'BW': 'Botswana',
3897         'BV': 'Bouvet Island',
3898         'BR': 'Brazil',
3899         'IO': 'British Indian Ocean Territory',
3900         'BN': 'Brunei Darussalam',
3901         'BG': 'Bulgaria',
3902         'BF': 'Burkina Faso',
3903         'BI': 'Burundi',
3904         'KH': 'Cambodia',
3905         'CM': 'Cameroon',
3906         'CA': 'Canada',
3907         'CV': 'Cape Verde',
3908         'KY': 'Cayman Islands',
3909         'CF': 'Central African Republic',
3910         'TD': 'Chad',
3911         'CL': 'Chile',
3912         'CN': 'China',
3913         'CX': 'Christmas Island',
3914         'CC': 'Cocos (Keeling) Islands',
3915         'CO': 'Colombia',
3916         'KM': 'Comoros',
3917         'CG': 'Congo',
3918         'CD': 'Congo, the Democratic Republic of the',
3919         'CK': 'Cook Islands',
3920         'CR': 'Costa Rica',
3921         'CI': 'Côte d\'Ivoire',
3922         'HR': 'Croatia',
3923         'CU': 'Cuba',
3924         'CW': 'Curaçao',
3925         'CY': 'Cyprus',
3926         'CZ': 'Czech Republic',
3927         'DK': 'Denmark',
3928         'DJ': 'Djibouti',
3929         'DM': 'Dominica',
3930         'DO': 'Dominican Republic',
3931         'EC': 'Ecuador',
3932         'EG': 'Egypt',
3933         'SV': 'El Salvador',
3934         'GQ': 'Equatorial Guinea',
3935         'ER': 'Eritrea',
3936         'EE': 'Estonia',
3937         'ET': 'Ethiopia',
3938         'FK': 'Falkland Islands (Malvinas)',
3939         'FO': 'Faroe Islands',
3940         'FJ': 'Fiji',
3941         'FI': 'Finland',
3942         'FR': 'France',
3943         'GF': 'French Guiana',
3944         'PF': 'French Polynesia',
3945         'TF': 'French Southern Territories',
3946         'GA': 'Gabon',
3947         'GM': 'Gambia',
3948         'GE': 'Georgia',
3949         'DE': 'Germany',
3950         'GH': 'Ghana',
3951         'GI': 'Gibraltar',
3952         'GR': 'Greece',
3953         'GL': 'Greenland',
3954         'GD': 'Grenada',
3955         'GP': 'Guadeloupe',
3956         'GU': 'Guam',
3957         'GT': 'Guatemala',
3958         'GG': 'Guernsey',
3959         'GN': 'Guinea',
3960         'GW': 'Guinea-Bissau',
3961         'GY': 'Guyana',
3962         'HT': 'Haiti',
3963         'HM': 'Heard Island and McDonald Islands',
3964         'VA': 'Holy See (Vatican City State)',
3965         'HN': 'Honduras',
3966         'HK': 'Hong Kong',
3967         'HU': 'Hungary',
3968         'IS': 'Iceland',
3969         'IN': 'India',
3970         'ID': 'Indonesia',
3971         'IR': 'Iran, Islamic Republic of',
3972         'IQ': 'Iraq',
3973         'IE': 'Ireland',
3974         'IM': 'Isle of Man',
3975         'IL': 'Israel',
3976         'IT': 'Italy',
3977         'JM': 'Jamaica',
3978         'JP': 'Japan',
3979         'JE': 'Jersey',
3980         'JO': 'Jordan',
3981         'KZ': 'Kazakhstan',
3982         'KE': 'Kenya',
3983         'KI': 'Kiribati',
3984         'KP': 'Korea, Democratic People\'s Republic of',
3985         'KR': 'Korea, Republic of',
3986         'KW': 'Kuwait',
3987         'KG': 'Kyrgyzstan',
3988         'LA': 'Lao People\'s Democratic Republic',
3989         'LV': 'Latvia',
3990         'LB': 'Lebanon',
3991         'LS': 'Lesotho',
3992         'LR': 'Liberia',
3993         'LY': 'Libya',
3994         'LI': 'Liechtenstein',
3995         'LT': 'Lithuania',
3996         'LU': 'Luxembourg',
3997         'MO': 'Macao',
3998         'MK': 'Macedonia, the Former Yugoslav Republic of',
3999         'MG': 'Madagascar',
4000         'MW': 'Malawi',
4001         'MY': 'Malaysia',
4002         'MV': 'Maldives',
4003         'ML': 'Mali',
4004         'MT': 'Malta',
4005         'MH': 'Marshall Islands',
4006         'MQ': 'Martinique',
4007         'MR': 'Mauritania',
4008         'MU': 'Mauritius',
4009         'YT': 'Mayotte',
4010         'MX': 'Mexico',
4011         'FM': 'Micronesia, Federated States of',
4012         'MD': 'Moldova, Republic of',
4013         'MC': 'Monaco',
4014         'MN': 'Mongolia',
4015         'ME': 'Montenegro',
4016         'MS': 'Montserrat',
4017         'MA': 'Morocco',
4018         'MZ': 'Mozambique',
4019         'MM': 'Myanmar',
4020         'NA': 'Namibia',
4021         'NR': 'Nauru',
4022         'NP': 'Nepal',
4023         'NL': 'Netherlands',
4024         'NC': 'New Caledonia',
4025         'NZ': 'New Zealand',
4026         'NI': 'Nicaragua',
4027         'NE': 'Niger',
4028         'NG': 'Nigeria',
4029         'NU': 'Niue',
4030         'NF': 'Norfolk Island',
4031         'MP': 'Northern Mariana Islands',
4032         'NO': 'Norway',
4033         'OM': 'Oman',
4034         'PK': 'Pakistan',
4035         'PW': 'Palau',
4036         'PS': 'Palestine, State of',
4037         'PA': 'Panama',
4038         'PG': 'Papua New Guinea',
4039         'PY': 'Paraguay',
4040         'PE': 'Peru',
4041         'PH': 'Philippines',
4042         'PN': 'Pitcairn',
4043         'PL': 'Poland',
4044         'PT': 'Portugal',
4045         'PR': 'Puerto Rico',
4046         'QA': 'Qatar',
4047         'RE': 'Réunion',
4048         'RO': 'Romania',
4049         'RU': 'Russian Federation',
4050         'RW': 'Rwanda',
4051         'BL': 'Saint Barthélemy',
4052         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4053         'KN': 'Saint Kitts and Nevis',
4054         'LC': 'Saint Lucia',
4055         'MF': 'Saint Martin (French part)',
4056         'PM': 'Saint Pierre and Miquelon',
4057         'VC': 'Saint Vincent and the Grenadines',
4058         'WS': 'Samoa',
4059         'SM': 'San Marino',
4060         'ST': 'Sao Tome and Principe',
4061         'SA': 'Saudi Arabia',
4062         'SN': 'Senegal',
4063         'RS': 'Serbia',
4064         'SC': 'Seychelles',
4065         'SL': 'Sierra Leone',
4066         'SG': 'Singapore',
4067         'SX': 'Sint Maarten (Dutch part)',
4068         'SK': 'Slovakia',
4069         'SI': 'Slovenia',
4070         'SB': 'Solomon Islands',
4071         'SO': 'Somalia',
4072         'ZA': 'South Africa',
4073         'GS': 'South Georgia and the South Sandwich Islands',
4074         'SS': 'South Sudan',
4075         'ES': 'Spain',
4076         'LK': 'Sri Lanka',
4077         'SD': 'Sudan',
4078         'SR': 'Suriname',
4079         'SJ': 'Svalbard and Jan Mayen',
4080         'SZ': 'Swaziland',
4081         'SE': 'Sweden',
4082         'CH': 'Switzerland',
4083         'SY': 'Syrian Arab Republic',
4084         'TW': 'Taiwan, Province of China',
4085         'TJ': 'Tajikistan',
4086         'TZ': 'Tanzania, United Republic of',
4087         'TH': 'Thailand',
4088         'TL': 'Timor-Leste',
4089         'TG': 'Togo',
4090         'TK': 'Tokelau',
4091         'TO': 'Tonga',
4092         'TT': 'Trinidad and Tobago',
4093         'TN': 'Tunisia',
4094         'TR': 'Turkey',
4095         'TM': 'Turkmenistan',
4096         'TC': 'Turks and Caicos Islands',
4097         'TV': 'Tuvalu',
4098         'UG': 'Uganda',
4099         'UA': 'Ukraine',
4100         'AE': 'United Arab Emirates',
4101         'GB': 'United Kingdom',
4102         'US': 'United States',
4103         'UM': 'United States Minor Outlying Islands',
4104         'UY': 'Uruguay',
4105         'UZ': 'Uzbekistan',
4106         'VU': 'Vanuatu',
4107         'VE': 'Venezuela, Bolivarian Republic of',
4108         'VN': 'Viet Nam',
4109         'VG': 'Virgin Islands, British',
4110         'VI': 'Virgin Islands, U.S.',
4111         'WF': 'Wallis and Futuna',
4112         'EH': 'Western Sahara',
4113         'YE': 'Yemen',
4114         'ZM': 'Zambia',
4115         'ZW': 'Zimbabwe',
4116     }
4117
4118     @classmethod
4119     def short2full(cls, code):
4120         """Convert an ISO 3166-2 country code to the corresponding full name"""
4121         return cls._country_map.get(code.upper())
4122
4123
4124 class GeoUtils:
4125     # Major IPv4 address blocks per country
4126     _country_ip_map = {
4127         'AD': '46.172.224.0/19',
4128         'AE': '94.200.0.0/13',
4129         'AF': '149.54.0.0/17',
4130         'AG': '209.59.64.0/18',
4131         'AI': '204.14.248.0/21',
4132         'AL': '46.99.0.0/16',
4133         'AM': '46.70.0.0/15',
4134         'AO': '105.168.0.0/13',
4135         'AP': '182.50.184.0/21',
4136         'AQ': '23.154.160.0/24',
4137         'AR': '181.0.0.0/12',
4138         'AS': '202.70.112.0/20',
4139         'AT': '77.116.0.0/14',
4140         'AU': '1.128.0.0/11',
4141         'AW': '181.41.0.0/18',
4142         'AX': '185.217.4.0/22',
4143         'AZ': '5.197.0.0/16',
4144         'BA': '31.176.128.0/17',
4145         'BB': '65.48.128.0/17',
4146         'BD': '114.130.0.0/16',
4147         'BE': '57.0.0.0/8',
4148         'BF': '102.178.0.0/15',
4149         'BG': '95.42.0.0/15',
4150         'BH': '37.131.0.0/17',
4151         'BI': '154.117.192.0/18',
4152         'BJ': '137.255.0.0/16',
4153         'BL': '185.212.72.0/23',
4154         'BM': '196.12.64.0/18',
4155         'BN': '156.31.0.0/16',
4156         'BO': '161.56.0.0/16',
4157         'BQ': '161.0.80.0/20',
4158         'BR': '191.128.0.0/12',
4159         'BS': '24.51.64.0/18',
4160         'BT': '119.2.96.0/19',
4161         'BW': '168.167.0.0/16',
4162         'BY': '178.120.0.0/13',
4163         'BZ': '179.42.192.0/18',
4164         'CA': '99.224.0.0/11',
4165         'CD': '41.243.0.0/16',
4166         'CF': '197.242.176.0/21',
4167         'CG': '160.113.0.0/16',
4168         'CH': '85.0.0.0/13',
4169         'CI': '102.136.0.0/14',
4170         'CK': '202.65.32.0/19',
4171         'CL': '152.172.0.0/14',
4172         'CM': '102.244.0.0/14',
4173         'CN': '36.128.0.0/10',
4174         'CO': '181.240.0.0/12',
4175         'CR': '201.192.0.0/12',
4176         'CU': '152.206.0.0/15',
4177         'CV': '165.90.96.0/19',
4178         'CW': '190.88.128.0/17',
4179         'CY': '31.153.0.0/16',
4180         'CZ': '88.100.0.0/14',
4181         'DE': '53.0.0.0/8',
4182         'DJ': '197.241.0.0/17',
4183         'DK': '87.48.0.0/12',
4184         'DM': '192.243.48.0/20',
4185         'DO': '152.166.0.0/15',
4186         'DZ': '41.96.0.0/12',
4187         'EC': '186.68.0.0/15',
4188         'EE': '90.190.0.0/15',
4189         'EG': '156.160.0.0/11',
4190         'ER': '196.200.96.0/20',
4191         'ES': '88.0.0.0/11',
4192         'ET': '196.188.0.0/14',
4193         'EU': '2.16.0.0/13',
4194         'FI': '91.152.0.0/13',
4195         'FJ': '144.120.0.0/16',
4196         'FK': '80.73.208.0/21',
4197         'FM': '119.252.112.0/20',
4198         'FO': '88.85.32.0/19',
4199         'FR': '90.0.0.0/9',
4200         'GA': '41.158.0.0/15',
4201         'GB': '25.0.0.0/8',
4202         'GD': '74.122.88.0/21',
4203         'GE': '31.146.0.0/16',
4204         'GF': '161.22.64.0/18',
4205         'GG': '62.68.160.0/19',
4206         'GH': '154.160.0.0/12',
4207         'GI': '95.164.0.0/16',
4208         'GL': '88.83.0.0/19',
4209         'GM': '160.182.0.0/15',
4210         'GN': '197.149.192.0/18',
4211         'GP': '104.250.0.0/19',
4212         'GQ': '105.235.224.0/20',
4213         'GR': '94.64.0.0/13',
4214         'GT': '168.234.0.0/16',
4215         'GU': '168.123.0.0/16',
4216         'GW': '197.214.80.0/20',
4217         'GY': '181.41.64.0/18',
4218         'HK': '113.252.0.0/14',
4219         'HN': '181.210.0.0/16',
4220         'HR': '93.136.0.0/13',
4221         'HT': '148.102.128.0/17',
4222         'HU': '84.0.0.0/14',
4223         'ID': '39.192.0.0/10',
4224         'IE': '87.32.0.0/12',
4225         'IL': '79.176.0.0/13',
4226         'IM': '5.62.80.0/20',
4227         'IN': '117.192.0.0/10',
4228         'IO': '203.83.48.0/21',
4229         'IQ': '37.236.0.0/14',
4230         'IR': '2.176.0.0/12',
4231         'IS': '82.221.0.0/16',
4232         'IT': '79.0.0.0/10',
4233         'JE': '87.244.64.0/18',
4234         'JM': '72.27.0.0/17',
4235         'JO': '176.29.0.0/16',
4236         'JP': '133.0.0.0/8',
4237         'KE': '105.48.0.0/12',
4238         'KG': '158.181.128.0/17',
4239         'KH': '36.37.128.0/17',
4240         'KI': '103.25.140.0/22',
4241         'KM': '197.255.224.0/20',
4242         'KN': '198.167.192.0/19',
4243         'KP': '175.45.176.0/22',
4244         'KR': '175.192.0.0/10',
4245         'KW': '37.36.0.0/14',
4246         'KY': '64.96.0.0/15',
4247         'KZ': '2.72.0.0/13',
4248         'LA': '115.84.64.0/18',
4249         'LB': '178.135.0.0/16',
4250         'LC': '24.92.144.0/20',
4251         'LI': '82.117.0.0/19',
4252         'LK': '112.134.0.0/15',
4253         'LR': '102.183.0.0/16',
4254         'LS': '129.232.0.0/17',
4255         'LT': '78.56.0.0/13',
4256         'LU': '188.42.0.0/16',
4257         'LV': '46.109.0.0/16',
4258         'LY': '41.252.0.0/14',
4259         'MA': '105.128.0.0/11',
4260         'MC': '88.209.64.0/18',
4261         'MD': '37.246.0.0/16',
4262         'ME': '178.175.0.0/17',
4263         'MF': '74.112.232.0/21',
4264         'MG': '154.126.0.0/17',
4265         'MH': '117.103.88.0/21',
4266         'MK': '77.28.0.0/15',
4267         'ML': '154.118.128.0/18',
4268         'MM': '37.111.0.0/17',
4269         'MN': '49.0.128.0/17',
4270         'MO': '60.246.0.0/16',
4271         'MP': '202.88.64.0/20',
4272         'MQ': '109.203.224.0/19',
4273         'MR': '41.188.64.0/18',
4274         'MS': '208.90.112.0/22',
4275         'MT': '46.11.0.0/16',
4276         'MU': '105.16.0.0/12',
4277         'MV': '27.114.128.0/18',
4278         'MW': '102.70.0.0/15',
4279         'MX': '187.192.0.0/11',
4280         'MY': '175.136.0.0/13',
4281         'MZ': '197.218.0.0/15',
4282         'NA': '41.182.0.0/16',
4283         'NC': '101.101.0.0/18',
4284         'NE': '197.214.0.0/18',
4285         'NF': '203.17.240.0/22',
4286         'NG': '105.112.0.0/12',
4287         'NI': '186.76.0.0/15',
4288         'NL': '145.96.0.0/11',
4289         'NO': '84.208.0.0/13',
4290         'NP': '36.252.0.0/15',
4291         'NR': '203.98.224.0/19',
4292         'NU': '49.156.48.0/22',
4293         'NZ': '49.224.0.0/14',
4294         'OM': '5.36.0.0/15',
4295         'PA': '186.72.0.0/15',
4296         'PE': '186.160.0.0/14',
4297         'PF': '123.50.64.0/18',
4298         'PG': '124.240.192.0/19',
4299         'PH': '49.144.0.0/13',
4300         'PK': '39.32.0.0/11',
4301         'PL': '83.0.0.0/11',
4302         'PM': '70.36.0.0/20',
4303         'PR': '66.50.0.0/16',
4304         'PS': '188.161.0.0/16',
4305         'PT': '85.240.0.0/13',
4306         'PW': '202.124.224.0/20',
4307         'PY': '181.120.0.0/14',
4308         'QA': '37.210.0.0/15',
4309         'RE': '102.35.0.0/16',
4310         'RO': '79.112.0.0/13',
4311         'RS': '93.86.0.0/15',
4312         'RU': '5.136.0.0/13',
4313         'RW': '41.186.0.0/16',
4314         'SA': '188.48.0.0/13',
4315         'SB': '202.1.160.0/19',
4316         'SC': '154.192.0.0/11',
4317         'SD': '102.120.0.0/13',
4318         'SE': '78.64.0.0/12',
4319         'SG': '8.128.0.0/10',
4320         'SI': '188.196.0.0/14',
4321         'SK': '78.98.0.0/15',
4322         'SL': '102.143.0.0/17',
4323         'SM': '89.186.32.0/19',
4324         'SN': '41.82.0.0/15',
4325         'SO': '154.115.192.0/18',
4326         'SR': '186.179.128.0/17',
4327         'SS': '105.235.208.0/21',
4328         'ST': '197.159.160.0/19',
4329         'SV': '168.243.0.0/16',
4330         'SX': '190.102.0.0/20',
4331         'SY': '5.0.0.0/16',
4332         'SZ': '41.84.224.0/19',
4333         'TC': '65.255.48.0/20',
4334         'TD': '154.68.128.0/19',
4335         'TG': '196.168.0.0/14',
4336         'TH': '171.96.0.0/13',
4337         'TJ': '85.9.128.0/18',
4338         'TK': '27.96.24.0/21',
4339         'TL': '180.189.160.0/20',
4340         'TM': '95.85.96.0/19',
4341         'TN': '197.0.0.0/11',
4342         'TO': '175.176.144.0/21',
4343         'TR': '78.160.0.0/11',
4344         'TT': '186.44.0.0/15',
4345         'TV': '202.2.96.0/19',
4346         'TW': '120.96.0.0/11',
4347         'TZ': '156.156.0.0/14',
4348         'UA': '37.52.0.0/14',
4349         'UG': '102.80.0.0/13',
4350         'US': '6.0.0.0/8',
4351         'UY': '167.56.0.0/13',
4352         'UZ': '84.54.64.0/18',
4353         'VA': '212.77.0.0/19',
4354         'VC': '207.191.240.0/21',
4355         'VE': '186.88.0.0/13',
4356         'VG': '66.81.192.0/20',
4357         'VI': '146.226.0.0/16',
4358         'VN': '14.160.0.0/11',
4359         'VU': '202.80.32.0/20',
4360         'WF': '117.20.32.0/21',
4361         'WS': '202.4.32.0/19',
4362         'YE': '134.35.0.0/16',
4363         'YT': '41.242.116.0/22',
4364         'ZA': '41.0.0.0/11',
4365         'ZM': '102.144.0.0/13',
4366         'ZW': '102.177.192.0/18',
4367     }
4368
4369     @classmethod
4370     def random_ipv4(cls, code_or_block):
4371         if len(code_or_block) == 2:
4372             block = cls._country_ip_map.get(code_or_block.upper())
4373             if not block:
4374                 return None
4375         else:
4376             block = code_or_block
4377         addr, preflen = block.split('/')
4378         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
4379         addr_max = addr_min | (0xffffffff >> int(preflen))
4380         return compat_str(socket.inet_ntoa(
4381             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
4382
4383
4384 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
4385     def __init__(self, proxies=None):
4386         # Set default handlers
4387         for type in ('http', 'https'):
4388             setattr(self, '%s_open' % type,
4389                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4390                         meth(r, proxy, type))
4391         compat_urllib_request.ProxyHandler.__init__(self, proxies)
4392
4393     def proxy_open(self, req, proxy, type):
4394         req_proxy = req.headers.get('Ytdl-request-proxy')
4395         if req_proxy is not None:
4396             proxy = req_proxy
4397             del req.headers['Ytdl-request-proxy']
4398
4399         if proxy == '__noproxy__':
4400             return None  # No Proxy
4401         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4402             req.add_header('Ytdl-socks-proxy', proxy)
4403             # yt-dlp's http/https handlers do wrapping the socket with socks
4404             return None
4405         return compat_urllib_request.ProxyHandler.proxy_open(
4406             self, req, proxy, type)
4407
4408
4409 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4410 # released into Public Domain
4411 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4412
4413 def long_to_bytes(n, blocksize=0):
4414     """long_to_bytes(n:long, blocksize:int) : string
4415     Convert a long integer to a byte string.
4416
4417     If optional blocksize is given and greater than zero, pad the front of the
4418     byte string with binary zeros so that the length is a multiple of
4419     blocksize.
4420     """
4421     # after much testing, this algorithm was deemed to be the fastest
4422     s = b''
4423     n = int(n)
4424     while n > 0:
4425         s = compat_struct_pack('>I', n & 0xffffffff) + s
4426         n = n >> 32
4427     # strip off leading zeros
4428     for i in range(len(s)):
4429         if s[i] != b'\000'[0]:
4430             break
4431     else:
4432         # only happens when n == 0
4433         s = b'\000'
4434         i = 0
4435     s = s[i:]
4436     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4437     # de-padding being done above, but sigh...
4438     if blocksize > 0 and len(s) % blocksize:
4439         s = (blocksize - len(s) % blocksize) * b'\000' + s
4440     return s
4441
4442
4443 def bytes_to_long(s):
4444     """bytes_to_long(string) : long
4445     Convert a byte string to a long integer.
4446
4447     This is (essentially) the inverse of long_to_bytes().
4448     """
4449     acc = 0
4450     length = len(s)
4451     if length % 4:
4452         extra = (4 - length % 4)
4453         s = b'\000' * extra + s
4454         length = length + extra
4455     for i in range(0, length, 4):
4456         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
4457     return acc
4458
4459
4460 def ohdave_rsa_encrypt(data, exponent, modulus):
4461     '''
4462     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4463
4464     Input:
4465         data: data to encrypt, bytes-like object
4466         exponent, modulus: parameter e and N of RSA algorithm, both integer
4467     Output: hex string of encrypted data
4468
4469     Limitation: supports one block encryption only
4470     '''
4471
4472     payload = int(binascii.hexlify(data[::-1]), 16)
4473     encrypted = pow(payload, exponent, modulus)
4474     return '%x' % encrypted
4475
4476
4477 def pkcs1pad(data, length):
4478     """
4479     Padding input data with PKCS#1 scheme
4480
4481     @param {int[]} data        input data
4482     @param {int}   length      target length
4483     @returns {int[]}           padded data
4484     """
4485     if len(data) > length - 11:
4486         raise ValueError('Input data too long for PKCS#1 padding')
4487
4488     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4489     return [0, 2] + pseudo_random + [0] + data
4490
4491
4492 def encode_base_n(num, n, table=None):
4493     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
4494     if not table:
4495         table = FULL_TABLE[:n]
4496
4497     if n > len(table):
4498         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
4499
4500     if num == 0:
4501         return table[0]
4502
4503     ret = ''
4504     while num:
4505         ret = table[num % n] + ret
4506         num = num // n
4507     return ret
4508
4509
4510 def decode_packed_codes(code):
4511     mobj = re.search(PACKED_CODES_RE, code)
4512     obfuscated_code, base, count, symbols = mobj.groups()
4513     base = int(base)
4514     count = int(count)
4515     symbols = symbols.split('|')
4516     symbol_table = {}
4517
4518     while count:
4519         count -= 1
4520         base_n_count = encode_base_n(count, base)
4521         symbol_table[base_n_count] = symbols[count] or base_n_count
4522
4523     return re.sub(
4524         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4525         obfuscated_code)
4526
4527
4528 def caesar(s, alphabet, shift):
4529     if shift == 0:
4530         return s
4531     l = len(alphabet)
4532     return ''.join(
4533         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4534         for c in s)
4535
4536
4537 def rot47(s):
4538     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4539
4540
4541 def parse_m3u8_attributes(attrib):
4542     info = {}
4543     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4544         if val.startswith('"'):
4545             val = val[1:-1]
4546         info[key] = val
4547     return info
4548
4549
4550 def urshift(val, n):
4551     return val >> n if val >= 0 else (val + 0x100000000) >> n
4552
4553
4554 # Based on png2str() written by @gdkchan and improved by @yokrysty
4555 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4556 def decode_png(png_data):
4557     # Reference: https://www.w3.org/TR/PNG/
4558     header = png_data[8:]
4559
4560     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4561         raise OSError('Not a valid PNG file.')
4562
4563     int_map = {1: '>B', 2: '>H', 4: '>I'}
4564     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
4565
4566     chunks = []
4567
4568     while header:
4569         length = unpack_integer(header[:4])
4570         header = header[4:]
4571
4572         chunk_type = header[:4]
4573         header = header[4:]
4574
4575         chunk_data = header[:length]
4576         header = header[length:]
4577
4578         header = header[4:]  # Skip CRC
4579
4580         chunks.append({
4581             'type': chunk_type,
4582             'length': length,
4583             'data': chunk_data
4584         })
4585
4586     ihdr = chunks[0]['data']
4587
4588     width = unpack_integer(ihdr[:4])
4589     height = unpack_integer(ihdr[4:8])
4590
4591     idat = b''
4592
4593     for chunk in chunks:
4594         if chunk['type'] == b'IDAT':
4595             idat += chunk['data']
4596
4597     if not idat:
4598         raise OSError('Unable to read PNG data.')
4599
4600     decompressed_data = bytearray(zlib.decompress(idat))
4601
4602     stride = width * 3
4603     pixels = []
4604
4605     def _get_pixel(idx):
4606         x = idx % stride
4607         y = idx // stride
4608         return pixels[y][x]
4609
4610     for y in range(height):
4611         basePos = y * (1 + stride)
4612         filter_type = decompressed_data[basePos]
4613
4614         current_row = []
4615
4616         pixels.append(current_row)
4617
4618         for x in range(stride):
4619             color = decompressed_data[1 + basePos + x]
4620             basex = y * stride + x
4621             left = 0
4622             up = 0
4623
4624             if x > 2:
4625                 left = _get_pixel(basex - 3)
4626             if y > 0:
4627                 up = _get_pixel(basex - stride)
4628
4629             if filter_type == 1:  # Sub
4630                 color = (color + left) & 0xff
4631             elif filter_type == 2:  # Up
4632                 color = (color + up) & 0xff
4633             elif filter_type == 3:  # Average
4634                 color = (color + ((left + up) >> 1)) & 0xff
4635             elif filter_type == 4:  # Paeth
4636                 a = left
4637                 b = up
4638                 c = 0
4639
4640                 if x > 2 and y > 0:
4641                     c = _get_pixel(basex - stride - 3)
4642
4643                 p = a + b - c
4644
4645                 pa = abs(p - a)
4646                 pb = abs(p - b)
4647                 pc = abs(p - c)
4648
4649                 if pa <= pb and pa <= pc:
4650                     color = (color + a) & 0xff
4651                 elif pb <= pc:
4652                     color = (color + b) & 0xff
4653                 else:
4654                     color = (color + c) & 0xff
4655
4656             current_row.append(color)
4657
4658     return width, height, pixels
4659
4660
4661 def write_xattr(path, key, value):
4662     # This mess below finds the best xattr tool for the job
4663     try:
4664         # try the pyxattr module...
4665         import xattr
4666
4667         if hasattr(xattr, 'set'):  # pyxattr
4668             # Unicode arguments are not supported in python-pyxattr until
4669             # version 0.5.0
4670             # See https://github.com/ytdl-org/youtube-dl/issues/5498
4671             pyxattr_required_version = '0.5.0'
4672             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
4673                 # TODO: fallback to CLI tools
4674                 raise XAttrUnavailableError(
4675                     'python-pyxattr is detected but is too old. '
4676                     'yt-dlp requires %s or above while your version is %s. '
4677                     'Falling back to other xattr implementations' % (
4678                         pyxattr_required_version, xattr.__version__))
4679
4680             setxattr = xattr.set
4681         else:  # xattr
4682             setxattr = xattr.setxattr
4683
4684         try:
4685             setxattr(path, key, value)
4686         except OSError as e:
4687             raise XAttrMetadataError(e.errno, e.strerror)
4688
4689     except ImportError:
4690         if compat_os_name == 'nt':
4691             # Write xattrs to NTFS Alternate Data Streams:
4692             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4693             assert ':' not in key
4694             assert os.path.exists(path)
4695
4696             ads_fn = path + ':' + key
4697             try:
4698                 with open(ads_fn, 'wb') as f:
4699                     f.write(value)
4700             except OSError as e:
4701                 raise XAttrMetadataError(e.errno, e.strerror)
4702         else:
4703             user_has_setfattr = check_executable('setfattr', ['--version'])
4704             user_has_xattr = check_executable('xattr', ['-h'])
4705
4706             if user_has_setfattr or user_has_xattr:
4707
4708                 value = value.decode('utf-8')
4709                 if user_has_setfattr:
4710                     executable = 'setfattr'
4711                     opts = ['-n', key, '-v', value]
4712                 elif user_has_xattr:
4713                     executable = 'xattr'
4714                     opts = ['-w', key, value]
4715
4716                 cmd = ([encodeFilename(executable, True)]
4717                        + [encodeArgument(o) for o in opts]
4718                        + [encodeFilename(path, True)])
4719
4720                 try:
4721                     p = Popen(
4722                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
4723                 except OSError as e:
4724                     raise XAttrMetadataError(e.errno, e.strerror)
4725                 stdout, stderr = p.communicate_or_kill()
4726                 stderr = stderr.decode('utf-8', 'replace')
4727                 if p.returncode != 0:
4728                     raise XAttrMetadataError(p.returncode, stderr)
4729
4730             else:
4731                 # On Unix, and can't find pyxattr, setfattr, or xattr.
4732                 if sys.platform.startswith('linux'):
4733                     raise XAttrUnavailableError(
4734                         "Couldn't find a tool to set the xattrs. "
4735                         "Install either the python 'pyxattr' or 'xattr' "
4736                         "modules, or the GNU 'attr' package "
4737                         "(which contains the 'setfattr' tool).")
4738                 else:
4739                     raise XAttrUnavailableError(
4740                         "Couldn't find a tool to set the xattrs. "
4741                         "Install either the python 'xattr' module, "
4742                         "or the 'xattr' binary.")
4743
4744
4745 def random_birthday(year_field, month_field, day_field):
4746     start_date = datetime.date(1950, 1, 1)
4747     end_date = datetime.date(1995, 12, 31)
4748     offset = random.randint(0, (end_date - start_date).days)
4749     random_date = start_date + datetime.timedelta(offset)
4750     return {
4751         year_field: str(random_date.year),
4752         month_field: str(random_date.month),
4753         day_field: str(random_date.day),
4754     }
4755
4756
4757 # Templates for internet shortcut files, which are plain text files.
4758 DOT_URL_LINK_TEMPLATE = '''\
4759 [InternetShortcut]
4760 URL=%(url)s
4761 '''
4762
4763 DOT_WEBLOC_LINK_TEMPLATE = '''\
4764 <?xml version="1.0" encoding="UTF-8"?>
4765 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4766 <plist version="1.0">
4767 <dict>
4768 \t<key>URL</key>
4769 \t<string>%(url)s</string>
4770 </dict>
4771 </plist>
4772 '''
4773
4774 DOT_DESKTOP_LINK_TEMPLATE = '''\
4775 [Desktop Entry]
4776 Encoding=UTF-8
4777 Name=%(filename)s
4778 Type=Link
4779 URL=%(url)s
4780 Icon=text-html
4781 '''
4782
4783 LINK_TEMPLATES = {
4784     'url': DOT_URL_LINK_TEMPLATE,
4785     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
4786     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
4787 }
4788
4789
4790 def iri_to_uri(iri):
4791     """
4792     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
4793
4794     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
4795     """
4796
4797     iri_parts = compat_urllib_parse_urlparse(iri)
4798
4799     if '[' in iri_parts.netloc:
4800         raise ValueError('IPv6 URIs are not, yet, supported.')
4801         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
4802
4803     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
4804
4805     net_location = ''
4806     if iri_parts.username:
4807         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
4808         if iri_parts.password is not None:
4809             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
4810         net_location += '@'
4811
4812     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
4813     # The 'idna' encoding produces ASCII text.
4814     if iri_parts.port is not None and iri_parts.port != 80:
4815         net_location += ':' + str(iri_parts.port)
4816
4817     return urllib.parse.urlunparse(
4818         (iri_parts.scheme,
4819             net_location,
4820
4821             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
4822
4823             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
4824             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
4825
4826             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
4827             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
4828
4829             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
4830
4831     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
4832
4833
4834 def to_high_limit_path(path):
4835     if sys.platform in ['win32', 'cygwin']:
4836         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
4837         return '\\\\?\\' + os.path.abspath(path)
4838
4839     return path
4840
4841
4842 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
4843     val = traverse_obj(obj, *variadic(field))
4844     if val in ignore:
4845         return default
4846     return template % (func(val) if func else val)
4847
4848
4849 def clean_podcast_url(url):
4850     return re.sub(r'''(?x)
4851         (?:
4852             (?:
4853                 chtbl\.com/track|
4854                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
4855                 play\.podtrac\.com
4856             )/[^/]+|
4857             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
4858             flex\.acast\.com|
4859             pd(?:
4860                 cn\.co| # https://podcorn.com/analytics-prefix/
4861                 st\.fm # https://podsights.com/docs/
4862             )/e
4863         )/''', '', url)
4864
4865
4866 _HEX_TABLE = '0123456789abcdef'
4867
4868
4869 def random_uuidv4():
4870     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
4871
4872
4873 def make_dir(path, to_screen=None):
4874     try:
4875         dn = os.path.dirname(path)
4876         if dn and not os.path.exists(dn):
4877             os.makedirs(dn)
4878         return True
4879     except OSError as err:
4880         if callable(to_screen) is not None:
4881             to_screen('unable to create directory ' + error_to_compat_str(err))
4882         return False
4883
4884
4885 def get_executable_path():
4886     from zipimport import zipimporter
4887     if hasattr(sys, 'frozen'):  # Running from PyInstaller
4888         path = os.path.dirname(sys.executable)
4889     elif isinstance(__loader__, zipimporter):  # Running from ZIP
4890         path = os.path.join(os.path.dirname(__file__), '../..')
4891     else:
4892         path = os.path.join(os.path.dirname(__file__), '..')
4893     return os.path.abspath(path)
4894
4895
4896 def load_plugins(name, suffix, namespace):
4897     classes = {}
4898     with contextlib.suppress(FileNotFoundError):
4899         plugins_spec = importlib.util.spec_from_file_location(
4900             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
4901         plugins = importlib.util.module_from_spec(plugins_spec)
4902         sys.modules[plugins_spec.name] = plugins
4903         plugins_spec.loader.exec_module(plugins)
4904         for name in dir(plugins):
4905             if name in namespace:
4906                 continue
4907             if not name.endswith(suffix):
4908                 continue
4909             klass = getattr(plugins, name)
4910             classes[name] = namespace[name] = klass
4911     return classes
4912
4913
4914 def traverse_obj(
4915         obj, *path_list, default=None, expected_type=None, get_all=True,
4916         casesense=True, is_user_input=False, traverse_string=False):
4917     ''' Traverse nested list/dict/tuple
4918     @param path_list        A list of paths which are checked one by one.
4919                             Each path is a list of keys where each key is a:
4920                               - None:     Do nothing
4921                               - string:   A dictionary key
4922                               - int:      An index into a list
4923                               - tuple:    A list of keys all of which will be traversed
4924                               - Ellipsis: Fetch all values in the object
4925                               - Function: Takes the key and value as arguments
4926                                           and returns whether the key matches or not
4927     @param default          Default value to return
4928     @param expected_type    Only accept final value of this type (Can also be any callable)
4929     @param get_all          Return all the values obtained from a path or only the first one
4930     @param casesense        Whether to consider dictionary keys as case sensitive
4931     @param is_user_input    Whether the keys are generated from user input. If True,
4932                             strings are converted to int/slice if necessary
4933     @param traverse_string  Whether to traverse inside strings. If True, any
4934                             non-compatible object will also be converted into a string
4935     # TODO: Write tests
4936     '''
4937     if not casesense:
4938         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
4939         path_list = (map(_lower, variadic(path)) for path in path_list)
4940
4941     def _traverse_obj(obj, path, _current_depth=0):
4942         nonlocal depth
4943         path = tuple(variadic(path))
4944         for i, key in enumerate(path):
4945             if None in (key, obj):
4946                 return obj
4947             if isinstance(key, (list, tuple)):
4948                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
4949                 key = ...
4950             if key is ...:
4951                 obj = (obj.values() if isinstance(obj, dict)
4952                        else obj if isinstance(obj, (list, tuple, LazyList))
4953                        else str(obj) if traverse_string else [])
4954                 _current_depth += 1
4955                 depth = max(depth, _current_depth)
4956                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
4957             elif callable(key):
4958                 if isinstance(obj, (list, tuple, LazyList)):
4959                     obj = enumerate(obj)
4960                 elif isinstance(obj, dict):
4961                     obj = obj.items()
4962                 else:
4963                     if not traverse_string:
4964                         return None
4965                     obj = str(obj)
4966                 _current_depth += 1
4967                 depth = max(depth, _current_depth)
4968                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
4969             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
4970                 obj = (obj.get(key) if casesense or (key in obj)
4971                        else next((v for k, v in obj.items() if _lower(k) == key), None))
4972             else:
4973                 if is_user_input:
4974                     key = (int_or_none(key) if ':' not in key
4975                            else slice(*map(int_or_none, key.split(':'))))
4976                     if key == slice(None):
4977                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
4978                 if not isinstance(key, (int, slice)):
4979                     return None
4980                 if not isinstance(obj, (list, tuple, LazyList)):
4981                     if not traverse_string:
4982                         return None
4983                     obj = str(obj)
4984                 try:
4985                     obj = obj[key]
4986                 except IndexError:
4987                     return None
4988         return obj
4989
4990     if isinstance(expected_type, type):
4991         type_test = lambda val: val if isinstance(val, expected_type) else None
4992     elif expected_type is not None:
4993         type_test = expected_type
4994     else:
4995         type_test = lambda val: val
4996
4997     for path in path_list:
4998         depth = 0
4999         val = _traverse_obj(obj, path)
5000         if val is not None:
5001             if depth:
5002                 for _ in range(depth - 1):
5003                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5004                 val = [v for v in map(type_test, val) if v is not None]
5005                 if val:
5006                     return val if get_all else val[0]
5007             else:
5008                 val = type_test(val)
5009                 if val is not None:
5010                     return val
5011     return default
5012
5013
5014 def traverse_dict(dictn, keys, casesense=True):
5015     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5016                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5017     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5018
5019
5020 def get_first(obj, keys, **kwargs):
5021     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5022
5023
5024 def variadic(x, allowed_types=(str, bytes, dict)):
5025     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5026
5027
5028 def decode_base(value, digits):
5029     # This will convert given base-x string to scalar (long or int)
5030     table = {char: index for index, char in enumerate(digits)}
5031     result = 0
5032     base = len(digits)
5033     for chr in value:
5034         result *= base
5035         result += table[chr]
5036     return result
5037
5038
5039 def time_seconds(**kwargs):
5040     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5041     return t.timestamp()
5042
5043
5044 # create a JSON Web Signature (jws) with HS256 algorithm
5045 # the resulting format is in JWS Compact Serialization
5046 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5047 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5048 def jwt_encode_hs256(payload_data, key, headers={}):
5049     header_data = {
5050         'alg': 'HS256',
5051         'typ': 'JWT',
5052     }
5053     if headers:
5054         header_data.update(headers)
5055     header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
5056     payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
5057     h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
5058     signature_b64 = base64.b64encode(h.digest())
5059     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5060     return token
5061
5062
5063 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5064 def jwt_decode_hs256(jwt):
5065     header_b64, payload_b64, signature_b64 = jwt.split('.')
5066     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5067     return payload_data
5068
5069
5070 def supports_terminal_sequences(stream):
5071     if compat_os_name == 'nt':
5072         from .compat import WINDOWS_VT_MODE  # Must be imported locally
5073         if not WINDOWS_VT_MODE or get_windows_version() < (10, 0, 10586):
5074             return False
5075     elif not os.getenv('TERM'):
5076         return False
5077     try:
5078         return stream.isatty()
5079     except BaseException:
5080         return False
5081
5082
5083 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5084
5085
5086 def remove_terminal_sequences(string):
5087     return _terminal_sequences_re.sub('', string)
5088
5089
5090 def number_of_digits(number):
5091     return len('%d' % number)
5092
5093
5094 def join_nonempty(*values, delim='-', from_dict=None):
5095     if from_dict is not None:
5096         values = map(from_dict.get, values)
5097     return delim.join(map(str, filter(None, values)))
5098
5099
5100 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5101     """
5102     Find the largest format dimensions in terms of video width and, for each thumbnail:
5103     * Modify the URL: Match the width with the provided regex and replace with the former width
5104     * Update dimensions
5105
5106     This function is useful with video services that scale the provided thumbnails on demand
5107     """
5108     _keys = ('width', 'height')
5109     max_dimensions = max(
5110         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5111         default=(0, 0))
5112     if not max_dimensions[0]:
5113         return thumbnails
5114     return [
5115         merge_dicts(
5116             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5117             dict(zip(_keys, max_dimensions)), thumbnail)
5118         for thumbnail in thumbnails
5119     ]
5120
5121
5122 def parse_http_range(range):
5123     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5124     if not range:
5125         return None, None, None
5126     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5127     if not crg:
5128         return None, None, None
5129     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5130
5131
5132 class Config:
5133     own_args = None
5134     filename = None
5135     __initialized = False
5136
5137     def __init__(self, parser, label=None):
5138         self._parser, self.label = parser, label
5139         self._loaded_paths, self.configs = set(), []
5140
5141     def init(self, args=None, filename=None):
5142         assert not self.__initialized
5143         directory = ''
5144         if filename:
5145             location = os.path.realpath(filename)
5146             directory = os.path.dirname(location)
5147             if location in self._loaded_paths:
5148                 return False
5149             self._loaded_paths.add(location)
5150
5151         self.__initialized = True
5152         self.own_args, self.filename = args, filename
5153         for location in self._parser.parse_args(args)[0].config_locations or []:
5154             location = os.path.join(directory, expand_path(location))
5155             if os.path.isdir(location):
5156                 location = os.path.join(location, 'yt-dlp.conf')
5157             if not os.path.exists(location):
5158                 self._parser.error(f'config location {location} does not exist')
5159             self.append_config(self.read_file(location), location)
5160         return True
5161
5162     def __str__(self):
5163         label = join_nonempty(
5164             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5165             delim=' ')
5166         return join_nonempty(
5167             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5168             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5169             delim='\n')
5170
5171     @staticmethod
5172     def read_file(filename, default=[]):
5173         try:
5174             optionf = open(filename)
5175         except OSError:
5176             return default  # silently skip if file is not present
5177         try:
5178             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5179             contents = optionf.read()
5180             res = shlex.split(contents, comments=True)
5181         finally:
5182             optionf.close()
5183         return res
5184
5185     @staticmethod
5186     def hide_login_info(opts):
5187         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5188         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5189
5190         def _scrub_eq(o):
5191             m = eqre.match(o)
5192             if m:
5193                 return m.group('key') + '=PRIVATE'
5194             else:
5195                 return o
5196
5197         opts = list(map(_scrub_eq, opts))
5198         for idx, opt in enumerate(opts):
5199             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5200                 opts[idx + 1] = 'PRIVATE'
5201         return opts
5202
5203     def append_config(self, *args, label=None):
5204         config = type(self)(self._parser, label)
5205         config._loaded_paths = self._loaded_paths
5206         if config.init(*args):
5207             self.configs.append(config)
5208
5209     @property
5210     def all_args(self):
5211         for config in reversed(self.configs):
5212             yield from config.all_args
5213         yield from self.own_args or []
5214
5215     def parse_args(self):
5216         return self._parser.parse_args(self.all_args)
5217
5218
5219 class WebSocketsWrapper():
5220     """Wraps websockets module to use in non-async scopes"""
5221     pool = None
5222
5223     def __init__(self, url, headers=None, connect=True):
5224         self.loop = asyncio.events.new_event_loop()
5225         # XXX: "loop" is deprecated
5226         self.conn = websockets.connect(
5227             url, extra_headers=headers, ping_interval=None,
5228             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5229         if connect:
5230             self.__enter__()
5231         atexit.register(self.__exit__, None, None, None)
5232
5233     def __enter__(self):
5234         if not self.pool:
5235             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5236         return self
5237
5238     def send(self, *args):
5239         self.run_with_loop(self.pool.send(*args), self.loop)
5240
5241     def recv(self, *args):
5242         return self.run_with_loop(self.pool.recv(*args), self.loop)
5243
5244     def __exit__(self, type, value, traceback):
5245         try:
5246             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5247         finally:
5248             self.loop.close()
5249             self._cancel_all_tasks(self.loop)
5250
5251     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5252     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5253     @staticmethod
5254     def run_with_loop(main, loop):
5255         if not asyncio.coroutines.iscoroutine(main):
5256             raise ValueError(f'a coroutine was expected, got {main!r}')
5257
5258         try:
5259             return loop.run_until_complete(main)
5260         finally:
5261             loop.run_until_complete(loop.shutdown_asyncgens())
5262             if hasattr(loop, 'shutdown_default_executor'):
5263                 loop.run_until_complete(loop.shutdown_default_executor())
5264
5265     @staticmethod
5266     def _cancel_all_tasks(loop):
5267         to_cancel = asyncio.tasks.all_tasks(loop)
5268
5269         if not to_cancel:
5270             return
5271
5272         for task in to_cancel:
5273             task.cancel()
5274
5275         # XXX: "loop" is removed in python 3.10+
5276         loop.run_until_complete(
5277             asyncio.tasks.gather(*to_cancel, loop=loop, return_exceptions=True))
5278
5279         for task in to_cancel:
5280             if task.cancelled():
5281                 continue
5282             if task.exception() is not None:
5283                 loop.call_exception_handler({
5284                     'message': 'unhandled exception during asyncio.run() shutdown',
5285                     'exception': task.exception(),
5286                     'task': task,
5287                 })
5288
5289
5290 def merge_headers(*dicts):
5291     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5292     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5293
5294
5295 class classproperty:
5296     def __init__(self, f):
5297         self.f = f
5298
5299     def __get__(self, _, cls):
5300         return self.f(cls)
5301
5302
5303 def Namespace(**kwargs):
5304     return collections.namedtuple('Namespace', kwargs)(**kwargs)
5305
5306
5307 # Deprecated
5308 has_certifi = bool(certifi)
5309 has_websockets = bool(websockets)