yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 import asyncio
   3 import atexit
   4 import base64
   5 import binascii
   6 import calendar
   7 import codecs
   8 import collections
   9 import contextlib
  10 import ctypes
  11 import datetime
  12 import email.header
  13 import email.utils
  14 import errno
  15 import functools
  16 import gzip
  17 import hashlib
  18 import hmac
  19 import importlib.util
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import mimetypes
  26 import operator
  27 import os
  28 import platform
  29 import random
  30 import re
  31 import shlex
  32 import socket
  33 import ssl
  34 import subprocess
  35 import sys
  36 import tempfile
  37 import time
  38 import traceback
  39 import urllib.parse
  40 import xml.etree.ElementTree
  41 import zlib
  42
  43 from .compat import (
  44     compat_brotli,
  45     compat_chr,
  46     compat_cookiejar,
  47     compat_etree_fromstring,
  48     compat_expanduser,
  49     compat_html_entities,
  50     compat_html_entities_html5,
  51     compat_HTMLParseError,
  52     compat_HTMLParser,
  53     compat_http_client,
  54     compat_HTTPError,
  55     compat_os_name,
  56     compat_parse_qs,
  57     compat_shlex_quote,
  58     compat_str,
  59     compat_struct_pack,
  60     compat_struct_unpack,
  61     compat_urllib_error,
  62     compat_urllib_parse_unquote_plus,
  63     compat_urllib_parse_urlencode,
  64     compat_urllib_parse_urlparse,
  65     compat_urllib_request,
  66     compat_urlparse,
  67     compat_websockets,
  68 )
  69 from .socks import ProxyType, sockssocket
  70
  71 try:
  72     import certifi
  73
  74     # The certificate may not be bundled in executable
  75     has_certifi = os.path.exists(certifi.where())
  76 except ImportError:
  77     has_certifi = False
  78
  79
  80 def register_socks_protocols():
  81     # "Register" SOCKS protocols
  82     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  83     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  84     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  85         if scheme not in compat_urlparse.uses_netloc:
  86             compat_urlparse.uses_netloc.append(scheme)
  87
  88
  89 # This is not clearly defined otherwise
  90 compiled_regex_type = type(re.compile(''))
  91
  92
  93 def random_user_agent():
  94     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  95     _CHROME_VERSIONS = (
  96         '90.0.4430.212',
  97         '90.0.4430.24',
  98         '90.0.4430.70',
  99         '90.0.4430.72',
 100         '90.0.4430.85',
 101         '90.0.4430.93',
 102         '91.0.4472.101',
 103         '91.0.4472.106',
 104         '91.0.4472.114',
 105         '91.0.4472.124',
 106         '91.0.4472.164',
 107         '91.0.4472.19',
 108         '91.0.4472.77',
 109         '92.0.4515.107',
 110         '92.0.4515.115',
 111         '92.0.4515.131',
 112         '92.0.4515.159',
 113         '92.0.4515.43',
 114         '93.0.4556.0',
 115         '93.0.4577.15',
 116         '93.0.4577.63',
 117         '93.0.4577.82',
 118         '94.0.4606.41',
 119         '94.0.4606.54',
 120         '94.0.4606.61',
 121         '94.0.4606.71',
 122         '94.0.4606.81',
 123         '94.0.4606.85',
 124         '95.0.4638.17',
 125         '95.0.4638.50',
 126         '95.0.4638.54',
 127         '95.0.4638.69',
 128         '95.0.4638.74',
 129         '96.0.4664.18',
 130         '96.0.4664.45',
 131         '96.0.4664.55',
 132         '96.0.4664.93',
 133         '97.0.4692.20',
 134     )
 135     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 136
 137
 138 SUPPORTED_ENCODINGS = [
 139     'gzip', 'deflate'
 140 ]
 141 if compat_brotli:
 142     SUPPORTED_ENCODINGS.append('br')
 143
 144 std_headers = {
 145     'User-Agent': random_user_agent(),
 146     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 147     'Accept-Language': 'en-us,en;q=0.5',
 148     'Sec-Fetch-Mode': 'navigate',
 149 }
 150
 151
 152 USER_AGENTS = {
 153     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 154 }
 155
 156
 157 NO_DEFAULT = object()
 158
 159 ENGLISH_MONTH_NAMES = [
 160     'January', 'February', 'March', 'April', 'May', 'June',
 161     'July', 'August', 'September', 'October', 'November', 'December']
 162
 163 MONTH_NAMES = {
 164     'en': ENGLISH_MONTH_NAMES,
 165     'fr': [
 166         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 167         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 168 }
 169
 170 KNOWN_EXTENSIONS = (
 171     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 172     'flv', 'f4v', 'f4a', 'f4b',
 173     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 174     'mkv', 'mka', 'mk3d',
 175     'avi', 'divx',
 176     'mov',
 177     'asf', 'wmv', 'wma',
 178     '3gp', '3g2',
 179     'mp3',
 180     'flac',
 181     'ape',
 182     'wav',
 183     'f4f', 'f4m', 'm3u8', 'smil')
 184
 185 # needed for sanitizing filenames in restricted mode
 186 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 187                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 188                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 189
 190 DATE_FORMATS = (
 191     '%d %B %Y',
 192     '%d %b %Y',
 193     '%B %d %Y',
 194     '%B %dst %Y',
 195     '%B %dnd %Y',
 196     '%B %drd %Y',
 197     '%B %dth %Y',
 198     '%b %d %Y',
 199     '%b %dst %Y',
 200     '%b %dnd %Y',
 201     '%b %drd %Y',
 202     '%b %dth %Y',
 203     '%b %dst %Y %I:%M',
 204     '%b %dnd %Y %I:%M',
 205     '%b %drd %Y %I:%M',
 206     '%b %dth %Y %I:%M',
 207     '%Y %m %d',
 208     '%Y-%m-%d',
 209     '%Y.%m.%d.',
 210     '%Y/%m/%d',
 211     '%Y/%m/%d %H:%M',
 212     '%Y/%m/%d %H:%M:%S',
 213     '%Y%m%d%H%M',
 214     '%Y%m%d%H%M%S',
 215     '%Y%m%d',
 216     '%Y-%m-%d %H:%M',
 217     '%Y-%m-%d %H:%M:%S',
 218     '%Y-%m-%d %H:%M:%S.%f',
 219     '%Y-%m-%d %H:%M:%S:%f',
 220     '%d.%m.%Y %H:%M',
 221     '%d.%m.%Y %H.%M',
 222     '%Y-%m-%dT%H:%M:%SZ',
 223     '%Y-%m-%dT%H:%M:%S.%fZ',
 224     '%Y-%m-%dT%H:%M:%S.%f0Z',
 225     '%Y-%m-%dT%H:%M:%S',
 226     '%Y-%m-%dT%H:%M:%S.%f',
 227     '%Y-%m-%dT%H:%M',
 228     '%b %d %Y at %H:%M',
 229     '%b %d %Y at %H:%M:%S',
 230     '%B %d %Y at %H:%M',
 231     '%B %d %Y at %H:%M:%S',
 232     '%H:%M %d-%b-%Y',
 233 )
 234
 235 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 236 DATE_FORMATS_DAY_FIRST.extend([
 237     '%d-%m-%Y',
 238     '%d.%m.%Y',
 239     '%d.%m.%y',
 240     '%d/%m/%Y',
 241     '%d/%m/%y',
 242     '%d/%m/%Y %H:%M:%S',
 243 ])
 244
 245 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 246 DATE_FORMATS_MONTH_FIRST.extend([
 247     '%m-%d-%Y',
 248     '%m.%d.%Y',
 249     '%m/%d/%Y',
 250     '%m/%d/%y',
 251     '%m/%d/%Y %H:%M:%S',
 252 ])
 253
 254 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 255 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 256
 257
 258 def preferredencoding():
 259     """Get preferred encoding.
 260
 261     Returns the best encoding scheme for the system, based on
 262     locale.getpreferredencoding() and some further tweaks.
 263     """
 264     try:
 265         pref = locale.getpreferredencoding()
 266         'TEST'.encode(pref)
 267     except Exception:
 268         pref = 'UTF-8'
 269
 270     return pref
 271
 272
 273 def write_json_file(obj, fn):
 274     """ Encode obj as JSON and write it to fn, atomically if possible """
 275
 276     tf = tempfile.NamedTemporaryFile(
 277         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 278         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 279
 280     try:
 281         with tf:
 282             json.dump(obj, tf, ensure_ascii=False)
 283         if sys.platform == 'win32':
 284             # Need to remove existing file on Windows, else os.rename raises
 285             # WindowsError or FileExistsError.
 286             with contextlib.suppress(OSError):
 287                 os.unlink(fn)
 288         with contextlib.suppress(OSError):
 289             mask = os.umask(0)
 290             os.umask(mask)
 291             os.chmod(tf.name, 0o666 & ~mask)
 292         os.rename(tf.name, fn)
 293     except Exception:
 294         with contextlib.suppress(OSError):
 295             os.remove(tf.name)
 296         raise
 297
 298
 299 def find_xpath_attr(node, xpath, key, val=None):
 300     """ Find the xpath xpath[@key=val] """
 301     assert re.match(r'^[a-zA-Z_-]+$', key)
 302     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 303     return node.find(expr)
 304
 305 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 306 # the namespace parameter
 307
 308
 309 def xpath_with_ns(path, ns_map):
 310     components = [c.split(':') for c in path.split('/')]
 311     replaced = []
 312     for c in components:
 313         if len(c) == 1:
 314             replaced.append(c[0])
 315         else:
 316             ns, tag = c
 317             replaced.append('{%s}%s' % (ns_map[ns], tag))
 318     return '/'.join(replaced)
 319
 320
 321 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 322     def _find_xpath(xpath):
 323         return node.find(xpath)
 324
 325     if isinstance(xpath, (str, compat_str)):
 326         n = _find_xpath(xpath)
 327     else:
 328         for xp in xpath:
 329             n = _find_xpath(xp)
 330             if n is not None:
 331                 break
 332
 333     if n is None:
 334         if default is not NO_DEFAULT:
 335             return default
 336         elif fatal:
 337             name = xpath if name is None else name
 338             raise ExtractorError('Could not find XML element %s' % name)
 339         else:
 340             return None
 341     return n
 342
 343
 344 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 345     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 346     if n is None or n == default:
 347         return n
 348     if n.text is None:
 349         if default is not NO_DEFAULT:
 350             return default
 351         elif fatal:
 352             name = xpath if name is None else name
 353             raise ExtractorError('Could not find XML element\'s text %s' % name)
 354         else:
 355             return None
 356     return n.text
 357
 358
 359 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 360     n = find_xpath_attr(node, xpath, key)
 361     if n is None:
 362         if default is not NO_DEFAULT:
 363             return default
 364         elif fatal:
 365             name = f'{xpath}[@{key}]' if name is None else name
 366             raise ExtractorError('Could not find XML attribute %s' % name)
 367         else:
 368             return None
 369     return n.attrib[key]
 370
 371
 372 def get_element_by_id(id, html):
 373     """Return the content of the tag with the specified ID in the passed HTML document"""
 374     return get_element_by_attribute('id', id, html)
 375
 376
 377 def get_element_html_by_id(id, html):
 378     """Return the html of the tag with the specified ID in the passed HTML document"""
 379     return get_element_html_by_attribute('id', id, html)
 380
 381
 382 def get_element_by_class(class_name, html):
 383     """Return the content of the first tag with the specified class in the passed HTML document"""
 384     retval = get_elements_by_class(class_name, html)
 385     return retval[0] if retval else None
 386
 387
 388 def get_element_html_by_class(class_name, html):
 389     """Return the html of the first tag with the specified class in the passed HTML document"""
 390     retval = get_elements_html_by_class(class_name, html)
 391     return retval[0] if retval else None
 392
 393
 394 def get_element_by_attribute(attribute, value, html, escape_value=True):
 395     retval = get_elements_by_attribute(attribute, value, html, escape_value)
 396     return retval[0] if retval else None
 397
 398
 399 def get_element_html_by_attribute(attribute, value, html, escape_value=True):
 400     retval = get_elements_html_by_attribute(attribute, value, html, escape_value)
 401     return retval[0] if retval else None
 402
 403
 404 def get_elements_by_class(class_name, html):
 405     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 406     return get_elements_by_attribute(
 407         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 408         html, escape_value=False)
 409
 410
 411 def get_elements_html_by_class(class_name, html):
 412     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 413     return get_elements_html_by_attribute(
 414         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 415         html, escape_value=False)
 416
 417
 418 def get_elements_by_attribute(*args, **kwargs):
 419     """Return the content of the tag with the specified attribute in the passed HTML document"""
 420     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 421
 422
 423 def get_elements_html_by_attribute(*args, **kwargs):
 424     """Return the html of the tag with the specified attribute in the passed HTML document"""
 425     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 426
 427
 428 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 429     """
 430     Return the text (content) and the html (whole) of the tag with the specified
 431     attribute in the passed HTML document
 432     """
 433
 434     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 435
 436     value = re.escape(value) if escape_value else value
 437
 438     partial_element_re = rf'''(?x)
 439         <(?P<tag>[a-zA-Z0-9:._-]+)
 440          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 441          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 442         '''
 443
 444     for m in re.finditer(partial_element_re, html):
 445         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 446
 447         yield (
 448             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 449             whole
 450         )
 451
 452
 453 class HTMLBreakOnClosingTagParser(compat_HTMLParser):
 454     """
 455     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 456     closing tag for the first opening tag it has encountered, and can be used
 457     as a context manager
 458     """
 459
 460     class HTMLBreakOnClosingTagException(Exception):
 461         pass
 462
 463     def __init__(self):
 464         self.tagstack = collections.deque()
 465         compat_HTMLParser.__init__(self)
 466
 467     def __enter__(self):
 468         return self
 469
 470     def __exit__(self, *_):
 471         self.close()
 472
 473     def close(self):
 474         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 475         # so data remains buffered; we no longer have any interest in it, thus
 476         # override this method to discard it
 477         pass
 478
 479     def handle_starttag(self, tag, _):
 480         self.tagstack.append(tag)
 481
 482     def handle_endtag(self, tag):
 483         if not self.tagstack:
 484             raise compat_HTMLParseError('no tags in the stack')
 485         while self.tagstack:
 486             inner_tag = self.tagstack.pop()
 487             if inner_tag == tag:
 488                 break
 489         else:
 490             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 491         if not self.tagstack:
 492             raise self.HTMLBreakOnClosingTagException()
 493
 494
 495 def get_element_text_and_html_by_tag(tag, html):
 496     """
 497     For the first element with the specified tag in the passed HTML document
 498     return its' content (text) and the whole element (html)
 499     """
 500     def find_or_raise(haystack, needle, exc):
 501         try:
 502             return haystack.index(needle)
 503         except ValueError:
 504             raise exc
 505     closing_tag = f'</{tag}>'
 506     whole_start = find_or_raise(
 507         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 508     content_start = find_or_raise(
 509         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 510     content_start += whole_start + 1
 511     with HTMLBreakOnClosingTagParser() as parser:
 512         parser.feed(html[whole_start:content_start])
 513         if not parser.tagstack or parser.tagstack[0] != tag:
 514             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 515         offset = content_start
 516         while offset < len(html):
 517             next_closing_tag_start = find_or_raise(
 518                 html[offset:], closing_tag,
 519                 compat_HTMLParseError(f'closing {tag} tag not found'))
 520             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 521             try:
 522                 parser.feed(html[offset:offset + next_closing_tag_end])
 523                 offset += next_closing_tag_end
 524             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 525                 return html[content_start:offset + next_closing_tag_start], \
 526                     html[whole_start:offset + next_closing_tag_end]
 527         raise compat_HTMLParseError('unexpected end of html')
 528
 529
 530 class HTMLAttributeParser(compat_HTMLParser):
 531     """Trivial HTML parser to gather the attributes for a single element"""
 532
 533     def __init__(self):
 534         self.attrs = {}
 535         compat_HTMLParser.__init__(self)
 536
 537     def handle_starttag(self, tag, attrs):
 538         self.attrs = dict(attrs)
 539
 540
 541 class HTMLListAttrsParser(compat_HTMLParser):
 542     """HTML parser to gather the attributes for the elements of a list"""
 543
 544     def __init__(self):
 545         compat_HTMLParser.__init__(self)
 546         self.items = []
 547         self._level = 0
 548
 549     def handle_starttag(self, tag, attrs):
 550         if tag == 'li' and self._level == 0:
 551             self.items.append(dict(attrs))
 552         self._level += 1
 553
 554     def handle_endtag(self, tag):
 555         self._level -= 1
 556
 557
 558 def extract_attributes(html_element):
 559     """Given a string for an HTML element such as
 560     <el
 561          a="foo" B="bar" c="&98;az" d=boz
 562          empty= noval entity="&amp;"
 563          sq='"' dq="'"
 564     >
 565     Decode and return a dictionary of attributes.
 566     {
 567         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 568         'empty': '', 'noval': None, 'entity': '&',
 569         'sq': '"', 'dq': '\''
 570     }.
 571     """
 572     parser = HTMLAttributeParser()
 573     with contextlib.suppress(compat_HTMLParseError):
 574         parser.feed(html_element)
 575         parser.close()
 576     return parser.attrs
 577
 578
 579 def parse_list(webpage):
 580     """Given a string for an series of HTML <li> elements,
 581     return a dictionary of their attributes"""
 582     parser = HTMLListAttrsParser()
 583     parser.feed(webpage)
 584     parser.close()
 585     return parser.items
 586
 587
 588 def clean_html(html):
 589     """Clean an HTML snippet into a readable string"""
 590
 591     if html is None:  # Convenience for sanitizing descriptions etc.
 592         return html
 593
 594     html = re.sub(r'\s+', ' ', html)
 595     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 596     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 597     # Strip html tags
 598     html = re.sub('<.*?>', '', html)
 599     # Replace html entities
 600     html = unescapeHTML(html)
 601     return html.strip()
 602
 603
 604 def sanitize_open(filename, open_mode):
 605     """Try to open the given filename, and slightly tweak it if this fails.
 606
 607     Attempts to open the given filename. If this fails, it tries to change
 608     the filename slightly, step by step, until it's either able to open it
 609     or it fails and raises a final exception, like the standard open()
 610     function.
 611
 612     It returns the tuple (stream, definitive_file_name).
 613     """
 614     if filename == '-':
 615         if sys.platform == 'win32':
 616             import msvcrt
 617             msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 618         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 619
 620     for attempt in range(2):
 621         try:
 622             try:
 623                 if sys.platform == 'win32':
 624                     # FIXME: An exclusive lock also locks the file from being read.
 625                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 626                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 627                     raise LockingUnsupportedError()
 628                 stream = locked_file(filename, open_mode, block=False).__enter__()
 629             except LockingUnsupportedError:
 630                 stream = open(filename, open_mode)
 631             return (stream, filename)
 632         except OSError as err:
 633             if attempt or err.errno in (errno.EACCES,):
 634                 raise
 635             old_filename, filename = filename, sanitize_path(filename)
 636             if old_filename == filename:
 637                 raise
 638
 639
 640 def timeconvert(timestr):
 641     """Convert RFC 2822 defined time string into system timestamp"""
 642     timestamp = None
 643     timetuple = email.utils.parsedate_tz(timestr)
 644     if timetuple is not None:
 645         timestamp = email.utils.mktime_tz(timetuple)
 646     return timestamp
 647
 648
 649 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 650     """Sanitizes a string so it could be used as part of a filename.
 651     @param restricted   Use a stricter subset of allowed characters
 652     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 653                         If unset, yt-dlp's new sanitization rules are in effect
 654     """
 655     if s == '':
 656         return ''
 657
 658     def replace_insane(char):
 659         if restricted and char in ACCENT_CHARS:
 660             return ACCENT_CHARS[char]
 661         elif not restricted and char == '\n':
 662             return '\0 '
 663         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 664             return ''
 665         elif char == '"':
 666             return '' if restricted else '\''
 667         elif char == ':':
 668             return '\0_\0-' if restricted else '\0 \0-'
 669         elif char in '\\/|*<>':
 670             return '\0_'
 671         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 672             return '\0_'
 673         return char
 674
 675     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 676     result = ''.join(map(replace_insane, s))
 677     if is_id is NO_DEFAULT:
 678         result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result)  # Remove repeated substitute chars
 679         STRIP_RE = '(?:\0.|[ _-])*'
 680         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 681     result = result.replace('\0', '') or '_'
 682
 683     if not is_id:
 684         while '__' in result:
 685             result = result.replace('__', '_')
 686         result = result.strip('_')
 687         # Common case of "Foreign band name - English song title"
 688         if restricted and result.startswith('-_'):
 689             result = result[2:]
 690         if result.startswith('-'):
 691             result = '_' + result[len('-'):]
 692         result = result.lstrip('.')
 693         if not result:
 694             result = '_'
 695     return result
 696
 697
 698 def sanitize_path(s, force=False):
 699     """Sanitizes and normalizes path on Windows"""
 700     if sys.platform == 'win32':
 701         force = False
 702         drive_or_unc, _ = os.path.splitdrive(s)
 703     elif force:
 704         drive_or_unc = ''
 705     else:
 706         return s
 707
 708     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 709     if drive_or_unc:
 710         norm_path.pop(0)
 711     sanitized_path = [
 712         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 713         for path_part in norm_path]
 714     if drive_or_unc:
 715         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 716     elif force and s and s[0] == os.path.sep:
 717         sanitized_path.insert(0, os.path.sep)
 718     return os.path.join(*sanitized_path)
 719
 720
 721 def sanitize_url(url):
 722     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 723     # the number of unwanted failures due to missing protocol
 724     if url.startswith('//'):
 725         return 'http:%s' % url
 726     # Fix some common typos seen so far
 727     COMMON_TYPOS = (
 728         # https://github.com/ytdl-org/youtube-dl/issues/15649
 729         (r'^httpss://', r'https://'),
 730         # https://bx1.be/lives/direct-tv/
 731         (r'^rmtp([es]?)://', r'rtmp\1://'),
 732     )
 733     for mistake, fixup in COMMON_TYPOS:
 734         if re.match(mistake, url):
 735             return re.sub(mistake, fixup, url)
 736     return url
 737
 738
 739 def extract_basic_auth(url):
 740     parts = compat_urlparse.urlsplit(url)
 741     if parts.username is None:
 742         return url, None
 743     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
 744         parts.hostname if parts.port is None
 745         else '%s:%d' % (parts.hostname, parts.port))))
 746     auth_payload = base64.b64encode(
 747         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
 748     return url, 'Basic ' + auth_payload.decode('utf-8')
 749
 750
 751 def sanitized_Request(url, *args, **kwargs):
 752     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 753     if auth_header is not None:
 754         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 755         headers['Authorization'] = auth_header
 756     return compat_urllib_request.Request(url, *args, **kwargs)
 757
 758
 759 def expand_path(s):
 760     """Expand shell variables and ~"""
 761     return os.path.expandvars(compat_expanduser(s))
 762
 763
 764 def orderedSet(iterable):
 765     """ Remove all duplicates from the input iterable """
 766     res = []
 767     for el in iterable:
 768         if el not in res:
 769             res.append(el)
 770     return res
 771
 772
 773 def _htmlentity_transform(entity_with_semicolon):
 774     """Transforms an HTML entity to a character."""
 775     entity = entity_with_semicolon[:-1]
 776
 777     # Known non-numeric HTML entity
 778     if entity in compat_html_entities.name2codepoint:
 779         return compat_chr(compat_html_entities.name2codepoint[entity])
 780
 781     # TODO: HTML5 allows entities without a semicolon. For example,
 782     # '&Eacuteric' should be decoded as 'Éric'.
 783     if entity_with_semicolon in compat_html_entities_html5:
 784         return compat_html_entities_html5[entity_with_semicolon]
 785
 786     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 787     if mobj is not None:
 788         numstr = mobj.group(1)
 789         if numstr.startswith('x'):
 790             base = 16
 791             numstr = '0%s' % numstr
 792         else:
 793             base = 10
 794         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 795         with contextlib.suppress(ValueError):
 796             return compat_chr(int(numstr, base))
 797
 798     # Unknown entity in name, return its literal representation
 799     return '&%s;' % entity
 800
 801
 802 def unescapeHTML(s):
 803     if s is None:
 804         return None
 805     assert isinstance(s, str)
 806
 807     return re.sub(
 808         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 809
 810
 811 def escapeHTML(text):
 812     return (
 813         text
 814         .replace('&', '&amp;')
 815         .replace('<', '&lt;')
 816         .replace('>', '&gt;')
 817         .replace('"', '&quot;')
 818         .replace("'", '&#39;')
 819     )
 820
 821
 822 def process_communicate_or_kill(p, *args, **kwargs):
 823     try:
 824         return p.communicate(*args, **kwargs)
 825     except BaseException:  # Including KeyboardInterrupt
 826         p.kill()
 827         p.wait()
 828         raise
 829
 830
 831 class Popen(subprocess.Popen):
 832     if sys.platform == 'win32':
 833         _startupinfo = subprocess.STARTUPINFO()
 834         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 835     else:
 836         _startupinfo = None
 837
 838     def __init__(self, *args, **kwargs):
 839         super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
 840
 841     def communicate_or_kill(self, *args, **kwargs):
 842         return process_communicate_or_kill(self, *args, **kwargs)
 843
 844
 845 def get_subprocess_encoding():
 846     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 847         # For subprocess calls, encode with locale encoding
 848         # Refer to http://stackoverflow.com/a/9951851/35070
 849         encoding = preferredencoding()
 850     else:
 851         encoding = sys.getfilesystemencoding()
 852     if encoding is None:
 853         encoding = 'utf-8'
 854     return encoding
 855
 856
 857 def encodeFilename(s, for_subprocess=False):
 858     assert isinstance(s, str)
 859     return s
 860
 861
 862 def decodeFilename(b, for_subprocess=False):
 863     return b
 864
 865
 866 def encodeArgument(s):
 867     # Legacy code that uses byte strings
 868     # Uncomment the following line after fixing all post processors
 869     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 870     return s if isinstance(s, str) else s.decode('ascii')
 871
 872
 873 def decodeArgument(b):
 874     return b
 875
 876
 877 def decodeOption(optval):
 878     if optval is None:
 879         return optval
 880     if isinstance(optval, bytes):
 881         optval = optval.decode(preferredencoding())
 882
 883     assert isinstance(optval, compat_str)
 884     return optval
 885
 886
 887 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 888
 889
 890 def timetuple_from_msec(msec):
 891     secs, msec = divmod(msec, 1000)
 892     mins, secs = divmod(secs, 60)
 893     hrs, mins = divmod(mins, 60)
 894     return _timetuple(hrs, mins, secs, msec)
 895
 896
 897 def formatSeconds(secs, delim=':', msec=False):
 898     time = timetuple_from_msec(secs * 1000)
 899     if time.hours:
 900         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 901     elif time.minutes:
 902         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 903     else:
 904         ret = '%d' % time.seconds
 905     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 906
 907
 908 def _ssl_load_windows_store_certs(ssl_context, storename):
 909     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 910     try:
 911         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 912                  if encoding == 'x509_asn' and (
 913                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 914     except PermissionError:
 915         return
 916     for cert in certs:
 917         with contextlib.suppress(ssl.SSLError):
 918             ssl_context.load_verify_locations(cadata=cert)
 919
 920
 921 def make_HTTPS_handler(params, **kwargs):
 922     opts_check_certificate = not params.get('nocheckcertificate')
 923     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 924     context.check_hostname = opts_check_certificate
 925     if params.get('legacyserverconnect'):
 926         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 927     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 928     if opts_check_certificate:
 929         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 930             context.load_verify_locations(cafile=certifi.where())
 931         else:
 932             try:
 933                 context.load_default_certs()
 934                 # Work around the issue in load_default_certs when there are bad certificates. See:
 935                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
 936                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 937             except ssl.SSLError:
 938                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 939                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 940                     # Create a new context to discard any certificates that were already loaded
 941                     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 942                     context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
 943                     for storename in ('CA', 'ROOT'):
 944                         _ssl_load_windows_store_certs(context, storename)
 945                 context.set_default_verify_paths()
 946     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 947
 948
 949 def bug_reports_message(before=';'):
 950     msg = ('please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , '
 951            'filling out the appropriate issue template. '
 952            'Confirm you are on the latest version using  yt-dlp -U')
 953
 954     before = before.rstrip()
 955     if not before or before.endswith(('.', '!', '?')):
 956         msg = msg[0].title() + msg[1:]
 957
 958     return (before + ' ' if before else '') + msg
 959
 960
 961 class YoutubeDLError(Exception):
 962     """Base exception for YoutubeDL errors."""
 963     msg = None
 964
 965     def __init__(self, msg=None):
 966         if msg is not None:
 967             self.msg = msg
 968         elif self.msg is None:
 969             self.msg = type(self).__name__
 970         super().__init__(self.msg)
 971
 972
 973 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
 974 if hasattr(ssl, 'CertificateError'):
 975     network_exceptions.append(ssl.CertificateError)
 976 network_exceptions = tuple(network_exceptions)
 977
 978
 979 class ExtractorError(YoutubeDLError):
 980     """Error during info extraction."""
 981
 982     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
 983         """ tb, if given, is the original traceback (so that it can be printed out).
 984         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
 985         """
 986         if sys.exc_info()[0] in network_exceptions:
 987             expected = True
 988
 989         self.orig_msg = str(msg)
 990         self.traceback = tb
 991         self.expected = expected
 992         self.cause = cause
 993         self.video_id = video_id
 994         self.ie = ie
 995         self.exc_info = sys.exc_info()  # preserve original exception
 996
 997         super().__init__(''.join((
 998             format_field(ie, template='[%s] '),
 999             format_field(video_id, template='%s: '),
1000             msg,
1001             format_field(cause, template=' (caused by %r)'),
1002             '' if expected else bug_reports_message())))
1003
1004     def format_traceback(self):
1005         return join_nonempty(
1006             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1007             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1008             delim='\n') or None
1009
1010
1011 class UnsupportedError(ExtractorError):
1012     def __init__(self, url):
1013         super().__init__(
1014             'Unsupported URL: %s' % url, expected=True)
1015         self.url = url
1016
1017
1018 class RegexNotFoundError(ExtractorError):
1019     """Error when a regex didn't match"""
1020     pass
1021
1022
1023 class GeoRestrictedError(ExtractorError):
1024     """Geographic restriction Error exception.
1025
1026     This exception may be thrown when a video is not available from your
1027     geographic location due to geographic restrictions imposed by a website.
1028     """
1029
1030     def __init__(self, msg, countries=None, **kwargs):
1031         kwargs['expected'] = True
1032         super().__init__(msg, **kwargs)
1033         self.countries = countries
1034
1035
1036 class DownloadError(YoutubeDLError):
1037     """Download Error exception.
1038
1039     This exception may be thrown by FileDownloader objects if they are not
1040     configured to continue on errors. They will contain the appropriate
1041     error message.
1042     """
1043
1044     def __init__(self, msg, exc_info=None):
1045         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1046         super().__init__(msg)
1047         self.exc_info = exc_info
1048
1049
1050 class EntryNotInPlaylist(YoutubeDLError):
1051     """Entry not in playlist exception.
1052
1053     This exception will be thrown by YoutubeDL when a requested entry
1054     is not found in the playlist info_dict
1055     """
1056     msg = 'Entry not found in info'
1057
1058
1059 class SameFileError(YoutubeDLError):
1060     """Same File exception.
1061
1062     This exception will be thrown by FileDownloader objects if they detect
1063     multiple files would have to be downloaded to the same file on disk.
1064     """
1065     msg = 'Fixed output name but more than one file to download'
1066
1067     def __init__(self, filename=None):
1068         if filename is not None:
1069             self.msg += f': {filename}'
1070         super().__init__(self.msg)
1071
1072
1073 class PostProcessingError(YoutubeDLError):
1074     """Post Processing exception.
1075
1076     This exception may be raised by PostProcessor's .run() method to
1077     indicate an error in the postprocessing task.
1078     """
1079
1080
1081 class DownloadCancelled(YoutubeDLError):
1082     """ Exception raised when the download queue should be interrupted """
1083     msg = 'The download was cancelled'
1084
1085
1086 class ExistingVideoReached(DownloadCancelled):
1087     """ --break-on-existing triggered """
1088     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1089
1090
1091 class RejectedVideoReached(DownloadCancelled):
1092     """ --break-on-reject triggered """
1093     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1094
1095
1096 class MaxDownloadsReached(DownloadCancelled):
1097     """ --max-downloads limit has been reached. """
1098     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1099
1100
1101 class ReExtractInfo(YoutubeDLError):
1102     """ Video info needs to be re-extracted. """
1103
1104     def __init__(self, msg, expected=False):
1105         super().__init__(msg)
1106         self.expected = expected
1107
1108
1109 class ThrottledDownload(ReExtractInfo):
1110     """ Download speed below --throttled-rate. """
1111     msg = 'The download speed is below throttle limit'
1112
1113     def __init__(self):
1114         super().__init__(self.msg, expected=False)
1115
1116
1117 class UnavailableVideoError(YoutubeDLError):
1118     """Unavailable Format exception.
1119
1120     This exception will be thrown when a video is requested
1121     in a format that is not available for that video.
1122     """
1123     msg = 'Unable to download video'
1124
1125     def __init__(self, err=None):
1126         if err is not None:
1127             self.msg += f': {err}'
1128         super().__init__(self.msg)
1129
1130
1131 class ContentTooShortError(YoutubeDLError):
1132     """Content Too Short exception.
1133
1134     This exception may be raised by FileDownloader objects when a file they
1135     download is too small for what the server announced first, indicating
1136     the connection was probably interrupted.
1137     """
1138
1139     def __init__(self, downloaded, expected):
1140         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1141         # Both in bytes
1142         self.downloaded = downloaded
1143         self.expected = expected
1144
1145
1146 class XAttrMetadataError(YoutubeDLError):
1147     def __init__(self, code=None, msg='Unknown error'):
1148         super().__init__(msg)
1149         self.code = code
1150         self.msg = msg
1151
1152         # Parsing code and msg
1153         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1154                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1155             self.reason = 'NO_SPACE'
1156         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1157             self.reason = 'VALUE_TOO_LONG'
1158         else:
1159             self.reason = 'NOT_SUPPORTED'
1160
1161
1162 class XAttrUnavailableError(YoutubeDLError):
1163     pass
1164
1165
1166 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1167     hc = http_class(*args, **kwargs)
1168     source_address = ydl_handler._params.get('source_address')
1169
1170     if source_address is not None:
1171         # This is to workaround _create_connection() from socket where it will try all
1172         # address data from getaddrinfo() including IPv6. This filters the result from
1173         # getaddrinfo() based on the source_address value.
1174         # This is based on the cpython socket.create_connection() function.
1175         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1176         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1177             host, port = address
1178             err = None
1179             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1180             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1181             ip_addrs = [addr for addr in addrs if addr[0] == af]
1182             if addrs and not ip_addrs:
1183                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1184                 raise OSError(
1185                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1186                     % (ip_version, source_address[0]))
1187             for res in ip_addrs:
1188                 af, socktype, proto, canonname, sa = res
1189                 sock = None
1190                 try:
1191                     sock = socket.socket(af, socktype, proto)
1192                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1193                         sock.settimeout(timeout)
1194                     sock.bind(source_address)
1195                     sock.connect(sa)
1196                     err = None  # Explicitly break reference cycle
1197                     return sock
1198                 except OSError as _:
1199                     err = _
1200                     if sock is not None:
1201                         sock.close()
1202             if err is not None:
1203                 raise err
1204             else:
1205                 raise OSError('getaddrinfo returns an empty list')
1206         if hasattr(hc, '_create_connection'):
1207             hc._create_connection = _create_connection
1208         hc.source_address = (source_address, 0)
1209
1210     return hc
1211
1212
1213 def handle_youtubedl_headers(headers):
1214     filtered_headers = headers
1215
1216     if 'Youtubedl-no-compression' in filtered_headers:
1217         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1218         del filtered_headers['Youtubedl-no-compression']
1219
1220     return filtered_headers
1221
1222
1223 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
1224     """Handler for HTTP requests and responses.
1225
1226     This class, when installed with an OpenerDirector, automatically adds
1227     the standard headers to every HTTP request and handles gzipped and
1228     deflated responses from web servers. If compression is to be avoided in
1229     a particular request, the original request in the program code only has
1230     to include the HTTP header "Youtubedl-no-compression", which will be
1231     removed before making the real request.
1232
1233     Part of this code was copied from:
1234
1235     http://techknack.net/python-urllib2-handlers/
1236
1237     Andrew Rowls, the author of that code, agreed to release it to the
1238     public domain.
1239     """
1240
1241     def __init__(self, params, *args, **kwargs):
1242         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
1243         self._params = params
1244
1245     def http_open(self, req):
1246         conn_class = compat_http_client.HTTPConnection
1247
1248         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1249         if socks_proxy:
1250             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1251             del req.headers['Ytdl-socks-proxy']
1252
1253         return self.do_open(functools.partial(
1254             _create_http_connection, self, conn_class, False),
1255             req)
1256
1257     @staticmethod
1258     def deflate(data):
1259         if not data:
1260             return data
1261         try:
1262             return zlib.decompress(data, -zlib.MAX_WBITS)
1263         except zlib.error:
1264             return zlib.decompress(data)
1265
1266     @staticmethod
1267     def brotli(data):
1268         if not data:
1269             return data
1270         return compat_brotli.decompress(data)
1271
1272     def http_request(self, req):
1273         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1274         # always respected by websites, some tend to give out URLs with non percent-encoded
1275         # non-ASCII characters (see telemb.py, ard.py [#3412])
1276         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1277         # To work around aforementioned issue we will replace request's original URL with
1278         # percent-encoded one
1279         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1280         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1281         url = req.get_full_url()
1282         url_escaped = escape_url(url)
1283
1284         # Substitute URL if any change after escaping
1285         if url != url_escaped:
1286             req = update_Request(req, url=url_escaped)
1287
1288         for h, v in self._params.get('http_headers', std_headers).items():
1289             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1290             # The dict keys are capitalized because of this bug by urllib
1291             if h.capitalize() not in req.headers:
1292                 req.add_header(h, v)
1293
1294         if 'Accept-encoding' not in req.headers:
1295             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1296
1297         req.headers = handle_youtubedl_headers(req.headers)
1298
1299         return req
1300
1301     def http_response(self, req, resp):
1302         old_resp = resp
1303         # gzip
1304         if resp.headers.get('Content-encoding', '') == 'gzip':
1305             content = resp.read()
1306             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1307             try:
1308                 uncompressed = io.BytesIO(gz.read())
1309             except OSError as original_ioerror:
1310                 # There may be junk add the end of the file
1311                 # See http://stackoverflow.com/q/4928560/35070 for details
1312                 for i in range(1, 1024):
1313                     try:
1314                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1315                         uncompressed = io.BytesIO(gz.read())
1316                     except OSError:
1317                         continue
1318                     break
1319                 else:
1320                     raise original_ioerror
1321             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1322             resp.msg = old_resp.msg
1323             del resp.headers['Content-encoding']
1324         # deflate
1325         if resp.headers.get('Content-encoding', '') == 'deflate':
1326             gz = io.BytesIO(self.deflate(resp.read()))
1327             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1328             resp.msg = old_resp.msg
1329             del resp.headers['Content-encoding']
1330         # brotli
1331         if resp.headers.get('Content-encoding', '') == 'br':
1332             resp = compat_urllib_request.addinfourl(
1333                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1334             resp.msg = old_resp.msg
1335             del resp.headers['Content-encoding']
1336         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1337         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1338         if 300 <= resp.code < 400:
1339             location = resp.headers.get('Location')
1340             if location:
1341                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1342                 location = location.encode('iso-8859-1').decode('utf-8')
1343                 location_escaped = escape_url(location)
1344                 if location != location_escaped:
1345                     del resp.headers['Location']
1346                     resp.headers['Location'] = location_escaped
1347         return resp
1348
1349     https_request = http_request
1350     https_response = http_response
1351
1352
1353 def make_socks_conn_class(base_class, socks_proxy):
1354     assert issubclass(base_class, (
1355         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
1356
1357     url_components = compat_urlparse.urlparse(socks_proxy)
1358     if url_components.scheme.lower() == 'socks5':
1359         socks_type = ProxyType.SOCKS5
1360     elif url_components.scheme.lower() in ('socks', 'socks4'):
1361         socks_type = ProxyType.SOCKS4
1362     elif url_components.scheme.lower() == 'socks4a':
1363         socks_type = ProxyType.SOCKS4A
1364
1365     def unquote_if_non_empty(s):
1366         if not s:
1367             return s
1368         return compat_urllib_parse_unquote_plus(s)
1369
1370     proxy_args = (
1371         socks_type,
1372         url_components.hostname, url_components.port or 1080,
1373         True,  # Remote DNS
1374         unquote_if_non_empty(url_components.username),
1375         unquote_if_non_empty(url_components.password),
1376     )
1377
1378     class SocksConnection(base_class):
1379         def connect(self):
1380             self.sock = sockssocket()
1381             self.sock.setproxy(*proxy_args)
1382             if isinstance(self.timeout, (int, float)):
1383                 self.sock.settimeout(self.timeout)
1384             self.sock.connect((self.host, self.port))
1385
1386             if isinstance(self, compat_http_client.HTTPSConnection):
1387                 if hasattr(self, '_context'):  # Python > 2.6
1388                     self.sock = self._context.wrap_socket(
1389                         self.sock, server_hostname=self.host)
1390                 else:
1391                     self.sock = ssl.wrap_socket(self.sock)
1392
1393     return SocksConnection
1394
1395
1396 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
1397     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1398         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
1399         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
1400         self._params = params
1401
1402     def https_open(self, req):
1403         kwargs = {}
1404         conn_class = self._https_conn_class
1405
1406         if hasattr(self, '_context'):  # python > 2.6
1407             kwargs['context'] = self._context
1408         if hasattr(self, '_check_hostname'):  # python 3.x
1409             kwargs['check_hostname'] = self._check_hostname
1410
1411         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1412         if socks_proxy:
1413             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1414             del req.headers['Ytdl-socks-proxy']
1415
1416         return self.do_open(functools.partial(
1417             _create_http_connection, self, conn_class, True),
1418             req, **kwargs)
1419
1420
1421 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1422     """
1423     See [1] for cookie file format.
1424
1425     1. https://curl.haxx.se/docs/http-cookies.html
1426     """
1427     _HTTPONLY_PREFIX = '#HttpOnly_'
1428     _ENTRY_LEN = 7
1429     _HEADER = '''# Netscape HTTP Cookie File
1430 # This file is generated by yt-dlp.  Do not edit.
1431
1432 '''
1433     _CookieFileEntry = collections.namedtuple(
1434         'CookieFileEntry',
1435         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1436
1437     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1438         """
1439         Save cookies to a file.
1440
1441         Most of the code is taken from CPython 3.8 and slightly adapted
1442         to support cookie files with UTF-8 in both python 2 and 3.
1443         """
1444         if filename is None:
1445             if self.filename is not None:
1446                 filename = self.filename
1447             else:
1448                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1449
1450         # Store session cookies with `expires` set to 0 instead of an empty
1451         # string
1452         for cookie in self:
1453             if cookie.expires is None:
1454                 cookie.expires = 0
1455
1456         with open(filename, 'w', encoding='utf-8') as f:
1457             f.write(self._HEADER)
1458             now = time.time()
1459             for cookie in self:
1460                 if not ignore_discard and cookie.discard:
1461                     continue
1462                 if not ignore_expires and cookie.is_expired(now):
1463                     continue
1464                 if cookie.secure:
1465                     secure = 'TRUE'
1466                 else:
1467                     secure = 'FALSE'
1468                 if cookie.domain.startswith('.'):
1469                     initial_dot = 'TRUE'
1470                 else:
1471                     initial_dot = 'FALSE'
1472                 if cookie.expires is not None:
1473                     expires = compat_str(cookie.expires)
1474                 else:
1475                     expires = ''
1476                 if cookie.value is None:
1477                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
1478                     # with no name, whereas http.cookiejar regards it as a
1479                     # cookie with no value.
1480                     name = ''
1481                     value = cookie.name
1482                 else:
1483                     name = cookie.name
1484                     value = cookie.value
1485                 f.write(
1486                     '\t'.join([cookie.domain, initial_dot, cookie.path,
1487                                secure, expires, name, value]) + '\n')
1488
1489     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1490         """Load cookies from a file."""
1491         if filename is None:
1492             if self.filename is not None:
1493                 filename = self.filename
1494             else:
1495                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1496
1497         def prepare_line(line):
1498             if line.startswith(self._HTTPONLY_PREFIX):
1499                 line = line[len(self._HTTPONLY_PREFIX):]
1500             # comments and empty lines are fine
1501             if line.startswith('#') or not line.strip():
1502                 return line
1503             cookie_list = line.split('\t')
1504             if len(cookie_list) != self._ENTRY_LEN:
1505                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
1506             cookie = self._CookieFileEntry(*cookie_list)
1507             if cookie.expires_at and not cookie.expires_at.isdigit():
1508                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1509             return line
1510
1511         cf = io.StringIO()
1512         with open(filename, encoding='utf-8') as f:
1513             for line in f:
1514                 try:
1515                     cf.write(prepare_line(line))
1516                 except compat_cookiejar.LoadError as e:
1517                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1518                     continue
1519         cf.seek(0)
1520         self._really_load(cf, filename, ignore_discard, ignore_expires)
1521         # Session cookies are denoted by either `expires` field set to
1522         # an empty string or 0. MozillaCookieJar only recognizes the former
1523         # (see [1]). So we need force the latter to be recognized as session
1524         # cookies on our own.
1525         # Session cookies may be important for cookies-based authentication,
1526         # e.g. usually, when user does not check 'Remember me' check box while
1527         # logging in on a site, some important cookies are stored as session
1528         # cookies so that not recognizing them will result in failed login.
1529         # 1. https://bugs.python.org/issue17164
1530         for cookie in self:
1531             # Treat `expires=0` cookies as session cookies
1532             if cookie.expires == 0:
1533                 cookie.expires = None
1534                 cookie.discard = True
1535
1536
1537 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
1538     def __init__(self, cookiejar=None):
1539         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
1540
1541     def http_response(self, request, response):
1542         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
1543
1544     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
1545     https_response = http_response
1546
1547
1548 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
1549     """YoutubeDL redirect handler
1550
1551     The code is based on HTTPRedirectHandler implementation from CPython [1].
1552
1553     This redirect handler solves two issues:
1554      - ensures redirect URL is always unicode under python 2
1555      - introduces support for experimental HTTP response status code
1556        308 Permanent Redirect [2] used by some sites [3]
1557
1558     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1559     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1560     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1561     """
1562
1563     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
1564
1565     def redirect_request(self, req, fp, code, msg, headers, newurl):
1566         """Return a Request or None in response to a redirect.
1567
1568         This is called by the http_error_30x methods when a
1569         redirection response is received.  If a redirection should
1570         take place, return a new Request to allow http_error_30x to
1571         perform the redirect.  Otherwise, raise HTTPError if no-one
1572         else should try to handle this url.  Return None if you can't
1573         but another Handler might.
1574         """
1575         m = req.get_method()
1576         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1577                  or code in (301, 302, 303) and m == "POST")):
1578             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
1579         # Strictly (according to RFC 2616), 301 or 302 in response to
1580         # a POST MUST NOT cause a redirection without confirmation
1581         # from the user (of urllib.request, in this case).  In practice,
1582         # essentially all clients do redirect in this case, so we do
1583         # the same.
1584
1585         # Be conciliant with URIs containing a space.  This is mainly
1586         # redundant with the more complete encoding done in http_error_302(),
1587         # but it is kept for compatibility with other callers.
1588         newurl = newurl.replace(' ', '%20')
1589
1590         CONTENT_HEADERS = ("content-length", "content-type")
1591         # NB: don't use dict comprehension for python 2.6 compatibility
1592         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1593         return compat_urllib_request.Request(
1594             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1595             unverifiable=True)
1596
1597
1598 def extract_timezone(date_str):
1599     m = re.search(
1600         r'''(?x)
1601             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1602             (?P<tz>Z|                                            # just the UTC Z, or
1603                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1604                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1605                    [ ]?                                          # optional space
1606                 (?P<sign>\+|-)                                   # +/-
1607                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1608             $)
1609         ''', date_str)
1610     if not m:
1611         timezone = datetime.timedelta()
1612     else:
1613         date_str = date_str[:-len(m.group('tz'))]
1614         if not m.group('sign'):
1615             timezone = datetime.timedelta()
1616         else:
1617             sign = 1 if m.group('sign') == '+' else -1
1618             timezone = datetime.timedelta(
1619                 hours=sign * int(m.group('hours')),
1620                 minutes=sign * int(m.group('minutes')))
1621     return timezone, date_str
1622
1623
1624 def parse_iso8601(date_str, delimiter='T', timezone=None):
1625     """ Return a UNIX timestamp from the given date """
1626
1627     if date_str is None:
1628         return None
1629
1630     date_str = re.sub(r'\.[0-9]+', '', date_str)
1631
1632     if timezone is None:
1633         timezone, date_str = extract_timezone(date_str)
1634
1635     with contextlib.suppress(ValueError):
1636         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1637         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1638         return calendar.timegm(dt.timetuple())
1639
1640
1641 def date_formats(day_first=True):
1642     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1643
1644
1645 def unified_strdate(date_str, day_first=True):
1646     """Return a string with the date in the format YYYYMMDD"""
1647
1648     if date_str is None:
1649         return None
1650     upload_date = None
1651     # Replace commas
1652     date_str = date_str.replace(',', ' ')
1653     # Remove AM/PM + timezone
1654     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1655     _, date_str = extract_timezone(date_str)
1656
1657     for expression in date_formats(day_first):
1658         with contextlib.suppress(ValueError):
1659             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1660     if upload_date is None:
1661         timetuple = email.utils.parsedate_tz(date_str)
1662         if timetuple:
1663             with contextlib.suppress(ValueError):
1664                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1665     if upload_date is not None:
1666         return compat_str(upload_date)
1667
1668
1669 def unified_timestamp(date_str, day_first=True):
1670     if date_str is None:
1671         return None
1672
1673     date_str = re.sub(r'[,|]', '', date_str)
1674
1675     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1676     timezone, date_str = extract_timezone(date_str)
1677
1678     # Remove AM/PM + timezone
1679     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1680
1681     # Remove unrecognized timezones from ISO 8601 alike timestamps
1682     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1683     if m:
1684         date_str = date_str[:-len(m.group('tz'))]
1685
1686     # Python only supports microseconds, so remove nanoseconds
1687     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1688     if m:
1689         date_str = m.group(1)
1690
1691     for expression in date_formats(day_first):
1692         with contextlib.suppress(ValueError):
1693             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1694             return calendar.timegm(dt.timetuple())
1695     timetuple = email.utils.parsedate_tz(date_str)
1696     if timetuple:
1697         return calendar.timegm(timetuple) + pm_delta * 3600
1698
1699
1700 def determine_ext(url, default_ext='unknown_video'):
1701     if url is None or '.' not in url:
1702         return default_ext
1703     guess = url.partition('?')[0].rpartition('.')[2]
1704     if re.match(r'^[A-Za-z0-9]+$', guess):
1705         return guess
1706     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1707     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1708         return guess.rstrip('/')
1709     else:
1710         return default_ext
1711
1712
1713 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1714     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1715
1716
1717 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1718     """
1719     Return a datetime object from a string in the format YYYYMMDD or
1720     (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
1721
1722     format: string date format used to return datetime object from
1723     precision: round the time portion of a datetime object.
1724                 auto|microsecond|second|minute|hour|day.
1725                 auto: round to the unit provided in date_str (if applicable).
1726     """
1727     auto_precision = False
1728     if precision == 'auto':
1729         auto_precision = True
1730         precision = 'microsecond'
1731     today = datetime_round(datetime.datetime.utcnow(), precision)
1732     if date_str in ('now', 'today'):
1733         return today
1734     if date_str == 'yesterday':
1735         return today - datetime.timedelta(days=1)
1736     match = re.match(
1737         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
1738         date_str)
1739     if match is not None:
1740         start_time = datetime_from_str(match.group('start'), precision, format)
1741         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1742         unit = match.group('unit')
1743         if unit == 'month' or unit == 'year':
1744             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1745             unit = 'day'
1746         else:
1747             if unit == 'week':
1748                 unit = 'day'
1749                 time *= 7
1750             delta = datetime.timedelta(**{unit + 's': time})
1751             new_date = start_time + delta
1752         if auto_precision:
1753             return datetime_round(new_date, unit)
1754         return new_date
1755
1756     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1757
1758
1759 def date_from_str(date_str, format='%Y%m%d', strict=False):
1760     """
1761     Return a datetime object from a string in the format YYYYMMDD or
1762     (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
1763
1764     If "strict", only (now|today)[+-][0-9](day|week|month|year)(s)? is allowed
1765
1766     format: string date format used to return datetime object from
1767     """
1768     if strict and not re.fullmatch(r'\d{8}|(now|today)[+-]\d+(day|week|month|year)(s)?', date_str):
1769         raise ValueError(f'Invalid date format {date_str}')
1770     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1771
1772
1773 def datetime_add_months(dt, months):
1774     """Increment/Decrement a datetime object by months."""
1775     month = dt.month + months - 1
1776     year = dt.year + month // 12
1777     month = month % 12 + 1
1778     day = min(dt.day, calendar.monthrange(year, month)[1])
1779     return dt.replace(year, month, day)
1780
1781
1782 def datetime_round(dt, precision='day'):
1783     """
1784     Round a datetime object's time to a specific precision
1785     """
1786     if precision == 'microsecond':
1787         return dt
1788
1789     unit_seconds = {
1790         'day': 86400,
1791         'hour': 3600,
1792         'minute': 60,
1793         'second': 1,
1794     }
1795     roundto = lambda x, n: ((x + n / 2) // n) * n
1796     timestamp = calendar.timegm(dt.timetuple())
1797     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1798
1799
1800 def hyphenate_date(date_str):
1801     """
1802     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1803     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1804     if match is not None:
1805         return '-'.join(match.groups())
1806     else:
1807         return date_str
1808
1809
1810 class DateRange:
1811     """Represents a time interval between two dates"""
1812
1813     def __init__(self, start=None, end=None):
1814         """start and end must be strings in the format accepted by date"""
1815         if start is not None:
1816             self.start = date_from_str(start, strict=True)
1817         else:
1818             self.start = datetime.datetime.min.date()
1819         if end is not None:
1820             self.end = date_from_str(end, strict=True)
1821         else:
1822             self.end = datetime.datetime.max.date()
1823         if self.start > self.end:
1824             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1825
1826     @classmethod
1827     def day(cls, day):
1828         """Returns a range that only contains the given day"""
1829         return cls(day, day)
1830
1831     def __contains__(self, date):
1832         """Check if the date is in the range"""
1833         if not isinstance(date, datetime.date):
1834             date = date_from_str(date)
1835         return self.start <= date <= self.end
1836
1837     def __str__(self):
1838         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1839
1840
1841 def platform_name():
1842     """ Returns the platform name as a compat_str """
1843     res = platform.platform()
1844     if isinstance(res, bytes):
1845         res = res.decode(preferredencoding())
1846
1847     assert isinstance(res, compat_str)
1848     return res
1849
1850
1851 def get_windows_version():
1852     ''' Get Windows version. None if it's not running on Windows '''
1853     if compat_os_name == 'nt':
1854         return version_tuple(platform.win32_ver()[1])
1855     else:
1856         return None
1857
1858
1859 def write_string(s, out=None, encoding=None):
1860     assert isinstance(s, str)
1861     out = out or sys.stderr
1862
1863     if 'b' in getattr(out, 'mode', ''):
1864         byt = s.encode(encoding or preferredencoding(), 'ignore')
1865         out.write(byt)
1866     elif hasattr(out, 'buffer'):
1867         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1868         byt = s.encode(enc, 'ignore')
1869         out.buffer.write(byt)
1870     else:
1871         out.write(s)
1872     out.flush()
1873
1874
1875 def bytes_to_intlist(bs):
1876     if not bs:
1877         return []
1878     if isinstance(bs[0], int):  # Python 3
1879         return list(bs)
1880     else:
1881         return [ord(c) for c in bs]
1882
1883
1884 def intlist_to_bytes(xs):
1885     if not xs:
1886         return b''
1887     return compat_struct_pack('%dB' % len(xs), *xs)
1888
1889
1890 class LockingUnsupportedError(IOError):
1891     msg = 'File locking is not supported on this platform'
1892
1893     def __init__(self):
1894         super().__init__(self.msg)
1895
1896
1897 # Cross-platform file locking
1898 if sys.platform == 'win32':
1899     import ctypes.wintypes
1900     import msvcrt
1901
1902     class OVERLAPPED(ctypes.Structure):
1903         _fields_ = [
1904             ('Internal', ctypes.wintypes.LPVOID),
1905             ('InternalHigh', ctypes.wintypes.LPVOID),
1906             ('Offset', ctypes.wintypes.DWORD),
1907             ('OffsetHigh', ctypes.wintypes.DWORD),
1908             ('hEvent', ctypes.wintypes.HANDLE),
1909         ]
1910
1911     kernel32 = ctypes.windll.kernel32
1912     LockFileEx = kernel32.LockFileEx
1913     LockFileEx.argtypes = [
1914         ctypes.wintypes.HANDLE,     # hFile
1915         ctypes.wintypes.DWORD,      # dwFlags
1916         ctypes.wintypes.DWORD,      # dwReserved
1917         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1918         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1919         ctypes.POINTER(OVERLAPPED)  # Overlapped
1920     ]
1921     LockFileEx.restype = ctypes.wintypes.BOOL
1922     UnlockFileEx = kernel32.UnlockFileEx
1923     UnlockFileEx.argtypes = [
1924         ctypes.wintypes.HANDLE,     # hFile
1925         ctypes.wintypes.DWORD,      # dwReserved
1926         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1927         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1928         ctypes.POINTER(OVERLAPPED)  # Overlapped
1929     ]
1930     UnlockFileEx.restype = ctypes.wintypes.BOOL
1931     whole_low = 0xffffffff
1932     whole_high = 0x7fffffff
1933
1934     def _lock_file(f, exclusive, block):
1935         overlapped = OVERLAPPED()
1936         overlapped.Offset = 0
1937         overlapped.OffsetHigh = 0
1938         overlapped.hEvent = 0
1939         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1940
1941         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
1942                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
1943                           0, whole_low, whole_high, f._lock_file_overlapped_p):
1944             raise BlockingIOError('Locking file failed: %r' % ctypes.FormatError())
1945
1946     def _unlock_file(f):
1947         assert f._lock_file_overlapped_p
1948         handle = msvcrt.get_osfhandle(f.fileno())
1949         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
1950             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1951
1952 else:
1953     try:
1954         import fcntl
1955
1956         def _lock_file(f, exclusive, block):
1957             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
1958             if not block:
1959                 flags |= fcntl.LOCK_NB
1960             try:
1961                 fcntl.flock(f, flags)
1962             except BlockingIOError:
1963                 raise
1964             except OSError:  # AOSP does not have flock()
1965                 fcntl.lockf(f, flags)
1966
1967         def _unlock_file(f):
1968             try:
1969                 fcntl.flock(f, fcntl.LOCK_UN)
1970             except OSError:
1971                 fcntl.lockf(f, fcntl.LOCK_UN)
1972
1973     except ImportError:
1974
1975         def _lock_file(f, exclusive, block):
1976             raise LockingUnsupportedError()
1977
1978         def _unlock_file(f):
1979             raise LockingUnsupportedError()
1980
1981
1982 class locked_file:
1983     locked = False
1984
1985     def __init__(self, filename, mode, block=True, encoding=None):
1986         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
1987             raise NotImplementedError(mode)
1988         self.mode, self.block = mode, block
1989
1990         writable = any(f in mode for f in 'wax+')
1991         readable = any(f in mode for f in 'r+')
1992         flags = functools.reduce(operator.ior, (
1993             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
1994             getattr(os, 'O_BINARY', 0),  # Windows only
1995             getattr(os, 'O_NOINHERIT', 0),  # Windows only
1996             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
1997             os.O_APPEND if 'a' in mode else 0,
1998             os.O_EXCL if 'x' in mode else 0,
1999             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2000         ))
2001
2002         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2003
2004     def __enter__(self):
2005         exclusive = 'r' not in self.mode
2006         try:
2007             _lock_file(self.f, exclusive, self.block)
2008             self.locked = True
2009         except OSError:
2010             self.f.close()
2011             raise
2012         if 'w' in self.mode:
2013             self.f.truncate()
2014         return self
2015
2016     def unlock(self):
2017         if not self.locked:
2018             return
2019         try:
2020             _unlock_file(self.f)
2021         finally:
2022             self.locked = False
2023
2024     def __exit__(self, *_):
2025         try:
2026             self.unlock()
2027         finally:
2028             self.f.close()
2029
2030     open = __enter__
2031     close = __exit__
2032
2033     def __getattr__(self, attr):
2034         return getattr(self.f, attr)
2035
2036     def __iter__(self):
2037         return iter(self.f)
2038
2039
2040 def get_filesystem_encoding():
2041     encoding = sys.getfilesystemencoding()
2042     return encoding if encoding is not None else 'utf-8'
2043
2044
2045 def shell_quote(args):
2046     quoted_args = []
2047     encoding = get_filesystem_encoding()
2048     for a in args:
2049         if isinstance(a, bytes):
2050             # We may get a filename encoded with 'encodeFilename'
2051             a = a.decode(encoding)
2052         quoted_args.append(compat_shlex_quote(a))
2053     return ' '.join(quoted_args)
2054
2055
2056 def smuggle_url(url, data):
2057     """ Pass additional data in a URL for internal use. """
2058
2059     url, idata = unsmuggle_url(url, {})
2060     data.update(idata)
2061     sdata = compat_urllib_parse_urlencode(
2062         {'__youtubedl_smuggle': json.dumps(data)})
2063     return url + '#' + sdata
2064
2065
2066 def unsmuggle_url(smug_url, default=None):
2067     if '#__youtubedl_smuggle' not in smug_url:
2068         return smug_url, default
2069     url, _, sdata = smug_url.rpartition('#')
2070     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
2071     data = json.loads(jsond)
2072     return url, data
2073
2074
2075 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2076     """ Formats numbers with decimal sufixes like K, M, etc """
2077     num, factor = float_or_none(num), float(factor)
2078     if num is None or num < 0:
2079         return None
2080     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2081     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2082     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2083     if factor == 1024:
2084         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2085     converted = num / (factor ** exponent)
2086     return fmt % (converted, suffix)
2087
2088
2089 def format_bytes(bytes):
2090     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2091
2092
2093 def lookup_unit_table(unit_table, s):
2094     units_re = '|'.join(re.escape(u) for u in unit_table)
2095     m = re.match(
2096         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2097     if not m:
2098         return None
2099     num_str = m.group('num').replace(',', '.')
2100     mult = unit_table[m.group('unit')]
2101     return int(float(num_str) * mult)
2102
2103
2104 def parse_filesize(s):
2105     if s is None:
2106         return None
2107
2108     # The lower-case forms are of course incorrect and unofficial,
2109     # but we support those too
2110     _UNIT_TABLE = {
2111         'B': 1,
2112         'b': 1,
2113         'bytes': 1,
2114         'KiB': 1024,
2115         'KB': 1000,
2116         'kB': 1024,
2117         'Kb': 1000,
2118         'kb': 1000,
2119         'kilobytes': 1000,
2120         'kibibytes': 1024,
2121         'MiB': 1024 ** 2,
2122         'MB': 1000 ** 2,
2123         'mB': 1024 ** 2,
2124         'Mb': 1000 ** 2,
2125         'mb': 1000 ** 2,
2126         'megabytes': 1000 ** 2,
2127         'mebibytes': 1024 ** 2,
2128         'GiB': 1024 ** 3,
2129         'GB': 1000 ** 3,
2130         'gB': 1024 ** 3,
2131         'Gb': 1000 ** 3,
2132         'gb': 1000 ** 3,
2133         'gigabytes': 1000 ** 3,
2134         'gibibytes': 1024 ** 3,
2135         'TiB': 1024 ** 4,
2136         'TB': 1000 ** 4,
2137         'tB': 1024 ** 4,
2138         'Tb': 1000 ** 4,
2139         'tb': 1000 ** 4,
2140         'terabytes': 1000 ** 4,
2141         'tebibytes': 1024 ** 4,
2142         'PiB': 1024 ** 5,
2143         'PB': 1000 ** 5,
2144         'pB': 1024 ** 5,
2145         'Pb': 1000 ** 5,
2146         'pb': 1000 ** 5,
2147         'petabytes': 1000 ** 5,
2148         'pebibytes': 1024 ** 5,
2149         'EiB': 1024 ** 6,
2150         'EB': 1000 ** 6,
2151         'eB': 1024 ** 6,
2152         'Eb': 1000 ** 6,
2153         'eb': 1000 ** 6,
2154         'exabytes': 1000 ** 6,
2155         'exbibytes': 1024 ** 6,
2156         'ZiB': 1024 ** 7,
2157         'ZB': 1000 ** 7,
2158         'zB': 1024 ** 7,
2159         'Zb': 1000 ** 7,
2160         'zb': 1000 ** 7,
2161         'zettabytes': 1000 ** 7,
2162         'zebibytes': 1024 ** 7,
2163         'YiB': 1024 ** 8,
2164         'YB': 1000 ** 8,
2165         'yB': 1024 ** 8,
2166         'Yb': 1000 ** 8,
2167         'yb': 1000 ** 8,
2168         'yottabytes': 1000 ** 8,
2169         'yobibytes': 1024 ** 8,
2170     }
2171
2172     return lookup_unit_table(_UNIT_TABLE, s)
2173
2174
2175 def parse_count(s):
2176     if s is None:
2177         return None
2178
2179     s = re.sub(r'^[^\d]+\s', '', s).strip()
2180
2181     if re.match(r'^[\d,.]+$', s):
2182         return str_to_int(s)
2183
2184     _UNIT_TABLE = {
2185         'k': 1000,
2186         'K': 1000,
2187         'm': 1000 ** 2,
2188         'M': 1000 ** 2,
2189         'kk': 1000 ** 2,
2190         'KK': 1000 ** 2,
2191         'b': 1000 ** 3,
2192         'B': 1000 ** 3,
2193     }
2194
2195     ret = lookup_unit_table(_UNIT_TABLE, s)
2196     if ret is not None:
2197         return ret
2198
2199     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2200     if mobj:
2201         return str_to_int(mobj.group(1))
2202
2203
2204 def parse_resolution(s, *, lenient=False):
2205     if s is None:
2206         return {}
2207
2208     if lenient:
2209         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2210     else:
2211         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2212     if mobj:
2213         return {
2214             'width': int(mobj.group('w')),
2215             'height': int(mobj.group('h')),
2216         }
2217
2218     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2219     if mobj:
2220         return {'height': int(mobj.group(1))}
2221
2222     mobj = re.search(r'\b([48])[kK]\b', s)
2223     if mobj:
2224         return {'height': int(mobj.group(1)) * 540}
2225
2226     return {}
2227
2228
2229 def parse_bitrate(s):
2230     if not isinstance(s, compat_str):
2231         return
2232     mobj = re.search(r'\b(\d+)\s*kbps', s)
2233     if mobj:
2234         return int(mobj.group(1))
2235
2236
2237 def month_by_name(name, lang='en'):
2238     """ Return the number of a month by (locale-independently) English name """
2239
2240     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2241
2242     try:
2243         return month_names.index(name) + 1
2244     except ValueError:
2245         return None
2246
2247
2248 def month_by_abbreviation(abbrev):
2249     """ Return the number of a month by (locale-independently) English
2250         abbreviations """
2251
2252     try:
2253         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2254     except ValueError:
2255         return None
2256
2257
2258 def fix_xml_ampersands(xml_str):
2259     """Replace all the '&' by '&amp;' in XML"""
2260     return re.sub(
2261         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2262         '&amp;',
2263         xml_str)
2264
2265
2266 def setproctitle(title):
2267     assert isinstance(title, compat_str)
2268
2269     # ctypes in Jython is not complete
2270     # http://bugs.jython.org/issue2148
2271     if sys.platform.startswith('java'):
2272         return
2273
2274     try:
2275         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2276     except OSError:
2277         return
2278     except TypeError:
2279         # LoadLibrary in Windows Python 2.7.13 only expects
2280         # a bytestring, but since unicode_literals turns
2281         # every string into a unicode string, it fails.
2282         return
2283     title_bytes = title.encode('utf-8')
2284     buf = ctypes.create_string_buffer(len(title_bytes))
2285     buf.value = title_bytes
2286     try:
2287         libc.prctl(15, buf, 0, 0, 0)
2288     except AttributeError:
2289         return  # Strange libc, just skip this
2290
2291
2292 def remove_start(s, start):
2293     return s[len(start):] if s is not None and s.startswith(start) else s
2294
2295
2296 def remove_end(s, end):
2297     return s[:-len(end)] if s is not None and s.endswith(end) else s
2298
2299
2300 def remove_quotes(s):
2301     if s is None or len(s) < 2:
2302         return s
2303     for quote in ('"', "'", ):
2304         if s[0] == quote and s[-1] == quote:
2305             return s[1:-1]
2306     return s
2307
2308
2309 def get_domain(url):
2310     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
2311     return domain.group('domain') if domain else None
2312
2313
2314 def url_basename(url):
2315     path = compat_urlparse.urlparse(url).path
2316     return path.strip('/').split('/')[-1]
2317
2318
2319 def base_url(url):
2320     return re.match(r'https?://[^?#&]+/', url).group()
2321
2322
2323 def urljoin(base, path):
2324     if isinstance(path, bytes):
2325         path = path.decode('utf-8')
2326     if not isinstance(path, compat_str) or not path:
2327         return None
2328     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2329         return path
2330     if isinstance(base, bytes):
2331         base = base.decode('utf-8')
2332     if not isinstance(base, compat_str) or not re.match(
2333             r'^(?:https?:)?//', base):
2334         return None
2335     return compat_urlparse.urljoin(base, path)
2336
2337
2338 class HEADRequest(compat_urllib_request.Request):
2339     def get_method(self):
2340         return 'HEAD'
2341
2342
2343 class PUTRequest(compat_urllib_request.Request):
2344     def get_method(self):
2345         return 'PUT'
2346
2347
2348 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2349     if get_attr and v is not None:
2350         v = getattr(v, get_attr, None)
2351     try:
2352         return int(v) * invscale // scale
2353     except (ValueError, TypeError, OverflowError):
2354         return default
2355
2356
2357 def str_or_none(v, default=None):
2358     return default if v is None else compat_str(v)
2359
2360
2361 def str_to_int(int_str):
2362     """ A more relaxed version of int_or_none """
2363     if isinstance(int_str, int):
2364         return int_str
2365     elif isinstance(int_str, compat_str):
2366         int_str = re.sub(r'[,\.\+]', '', int_str)
2367         return int_or_none(int_str)
2368
2369
2370 def float_or_none(v, scale=1, invscale=1, default=None):
2371     if v is None:
2372         return default
2373     try:
2374         return float(v) * invscale / scale
2375     except (ValueError, TypeError):
2376         return default
2377
2378
2379 def bool_or_none(v, default=None):
2380     return v if isinstance(v, bool) else default
2381
2382
2383 def strip_or_none(v, default=None):
2384     return v.strip() if isinstance(v, compat_str) else default
2385
2386
2387 def url_or_none(url):
2388     if not url or not isinstance(url, compat_str):
2389         return None
2390     url = url.strip()
2391     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2392
2393
2394 def request_to_url(req):
2395     if isinstance(req, compat_urllib_request.Request):
2396         return req.get_full_url()
2397     else:
2398         return req
2399
2400
2401 def strftime_or_none(timestamp, date_format, default=None):
2402     datetime_object = None
2403     try:
2404         if isinstance(timestamp, (int, float)):  # unix timestamp
2405             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2406         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
2407             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2408         return datetime_object.strftime(date_format)
2409     except (ValueError, TypeError, AttributeError):
2410         return default
2411
2412
2413 def parse_duration(s):
2414     if not isinstance(s, str):
2415         return None
2416     s = s.strip()
2417     if not s:
2418         return None
2419
2420     days, hours, mins, secs, ms = [None] * 5
2421     m = re.match(r'''(?x)
2422             (?P<before_secs>
2423                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2424             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2425             (?P<ms>[.:][0-9]+)?Z?$
2426         ''', s)
2427     if m:
2428         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2429     else:
2430         m = re.match(
2431             r'''(?ix)(?:P?
2432                 (?:
2433                     [0-9]+\s*y(?:ears?)?,?\s*
2434                 )?
2435                 (?:
2436                     [0-9]+\s*m(?:onths?)?,?\s*
2437                 )?
2438                 (?:
2439                     [0-9]+\s*w(?:eeks?)?,?\s*
2440                 )?
2441                 (?:
2442                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2443                 )?
2444                 T)?
2445                 (?:
2446                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2447                 )?
2448                 (?:
2449                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2450                 )?
2451                 (?:
2452                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2453                 )?Z?$''', s)
2454         if m:
2455             days, hours, mins, secs, ms = m.groups()
2456         else:
2457             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2458             if m:
2459                 hours, mins = m.groups()
2460             else:
2461                 return None
2462
2463     if ms:
2464         ms = ms.replace(':', '.')
2465     return sum(float(part or 0) * mult for part, mult in (
2466         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2467
2468
2469 def prepend_extension(filename, ext, expected_real_ext=None):
2470     name, real_ext = os.path.splitext(filename)
2471     return (
2472         f'{name}.{ext}{real_ext}'
2473         if not expected_real_ext or real_ext[1:] == expected_real_ext
2474         else f'{filename}.{ext}')
2475
2476
2477 def replace_extension(filename, ext, expected_real_ext=None):
2478     name, real_ext = os.path.splitext(filename)
2479     return '{}.{}'.format(
2480         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2481         ext)
2482
2483
2484 def check_executable(exe, args=[]):
2485     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2486     args can be a list of arguments for a short output (like -version) """
2487     try:
2488         Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
2489     except OSError:
2490         return False
2491     return exe
2492
2493
2494 def _get_exe_version_output(exe, args, *, to_screen=None):
2495     if to_screen:
2496         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2497     try:
2498         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2499         # SIGTTOU if yt-dlp is run in the background.
2500         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2501         out, _ = Popen(
2502             [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
2503             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
2504     except OSError:
2505         return False
2506     if isinstance(out, bytes):  # Python 2.x
2507         out = out.decode('ascii', 'ignore')
2508     return out
2509
2510
2511 def detect_exe_version(output, version_re=None, unrecognized='present'):
2512     assert isinstance(output, compat_str)
2513     if version_re is None:
2514         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2515     m = re.search(version_re, output)
2516     if m:
2517         return m.group(1)
2518     else:
2519         return unrecognized
2520
2521
2522 def get_exe_version(exe, args=['--version'],
2523                     version_re=None, unrecognized='present'):
2524     """ Returns the version of the specified executable,
2525     or False if the executable is not present """
2526     out = _get_exe_version_output(exe, args)
2527     return detect_exe_version(out, version_re, unrecognized) if out else False
2528
2529
2530 class LazyList(collections.abc.Sequence):
2531     ''' Lazy immutable list from an iterable
2532     Note that slices of a LazyList are lists and not LazyList'''
2533
2534     class IndexError(IndexError):
2535         pass
2536
2537     def __init__(self, iterable, *, reverse=False, _cache=None):
2538         self.__iterable = iter(iterable)
2539         self.__cache = [] if _cache is None else _cache
2540         self.__reversed = reverse
2541
2542     def __iter__(self):
2543         if self.__reversed:
2544             # We need to consume the entire iterable to iterate in reverse
2545             yield from self.exhaust()
2546             return
2547         yield from self.__cache
2548         for item in self.__iterable:
2549             self.__cache.append(item)
2550             yield item
2551
2552     def __exhaust(self):
2553         self.__cache.extend(self.__iterable)
2554         # Discard the emptied iterable to make it pickle-able
2555         self.__iterable = []
2556         return self.__cache
2557
2558     def exhaust(self):
2559         ''' Evaluate the entire iterable '''
2560         return self.__exhaust()[::-1 if self.__reversed else 1]
2561
2562     @staticmethod
2563     def __reverse_index(x):
2564         return None if x is None else -(x + 1)
2565
2566     def __getitem__(self, idx):
2567         if isinstance(idx, slice):
2568             if self.__reversed:
2569                 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
2570             start, stop, step = idx.start, idx.stop, idx.step or 1
2571         elif isinstance(idx, int):
2572             if self.__reversed:
2573                 idx = self.__reverse_index(idx)
2574             start, stop, step = idx, idx, 0
2575         else:
2576             raise TypeError('indices must be integers or slices')
2577         if ((start or 0) < 0 or (stop or 0) < 0
2578                 or (start is None and step < 0)
2579                 or (stop is None and step > 0)):
2580             # We need to consume the entire iterable to be able to slice from the end
2581             # Obviously, never use this with infinite iterables
2582             self.__exhaust()
2583             try:
2584                 return self.__cache[idx]
2585             except IndexError as e:
2586                 raise self.IndexError(e) from e
2587         n = max(start or 0, stop or 0) - len(self.__cache) + 1
2588         if n > 0:
2589             self.__cache.extend(itertools.islice(self.__iterable, n))
2590         try:
2591             return self.__cache[idx]
2592         except IndexError as e:
2593             raise self.IndexError(e) from e
2594
2595     def __bool__(self):
2596         try:
2597             self[-1] if self.__reversed else self[0]
2598         except self.IndexError:
2599             return False
2600         return True
2601
2602     def __len__(self):
2603         self.__exhaust()
2604         return len(self.__cache)
2605
2606     def __reversed__(self):
2607         return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
2608
2609     def __copy__(self):
2610         return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
2611
2612     def __repr__(self):
2613         # repr and str should mimic a list. So we exhaust the iterable
2614         return repr(self.exhaust())
2615
2616     def __str__(self):
2617         return repr(self.exhaust())
2618
2619
2620 class PagedList:
2621
2622     class IndexError(IndexError):
2623         pass
2624
2625     def __len__(self):
2626         # This is only useful for tests
2627         return len(self.getslice())
2628
2629     def __init__(self, pagefunc, pagesize, use_cache=True):
2630         self._pagefunc = pagefunc
2631         self._pagesize = pagesize
2632         self._pagecount = float('inf')
2633         self._use_cache = use_cache
2634         self._cache = {}
2635
2636     def getpage(self, pagenum):
2637         page_results = self._cache.get(pagenum)
2638         if page_results is None:
2639             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2640         if self._use_cache:
2641             self._cache[pagenum] = page_results
2642         return page_results
2643
2644     def getslice(self, start=0, end=None):
2645         return list(self._getslice(start, end))
2646
2647     def _getslice(self, start, end):
2648         raise NotImplementedError('This method must be implemented by subclasses')
2649
2650     def __getitem__(self, idx):
2651         assert self._use_cache, 'Indexing PagedList requires cache'
2652         if not isinstance(idx, int) or idx < 0:
2653             raise TypeError('indices must be non-negative integers')
2654         entries = self.getslice(idx, idx + 1)
2655         if not entries:
2656             raise self.IndexError()
2657         return entries[0]
2658
2659
2660 class OnDemandPagedList(PagedList):
2661     """Download pages until a page with less than maximum results"""
2662
2663     def _getslice(self, start, end):
2664         for pagenum in itertools.count(start // self._pagesize):
2665             firstid = pagenum * self._pagesize
2666             nextfirstid = pagenum * self._pagesize + self._pagesize
2667             if start >= nextfirstid:
2668                 continue
2669
2670             startv = (
2671                 start % self._pagesize
2672                 if firstid <= start < nextfirstid
2673                 else 0)
2674             endv = (
2675                 ((end - 1) % self._pagesize) + 1
2676                 if (end is not None and firstid <= end <= nextfirstid)
2677                 else None)
2678
2679             try:
2680                 page_results = self.getpage(pagenum)
2681             except Exception:
2682                 self._pagecount = pagenum - 1
2683                 raise
2684             if startv != 0 or endv is not None:
2685                 page_results = page_results[startv:endv]
2686             yield from page_results
2687
2688             # A little optimization - if current page is not "full", ie. does
2689             # not contain page_size videos then we can assume that this page
2690             # is the last one - there are no more ids on further pages -
2691             # i.e. no need to query again.
2692             if len(page_results) + startv < self._pagesize:
2693                 break
2694
2695             # If we got the whole page, but the next page is not interesting,
2696             # break out early as well
2697             if end == nextfirstid:
2698                 break
2699
2700
2701 class InAdvancePagedList(PagedList):
2702     """PagedList with total number of pages known in advance"""
2703
2704     def __init__(self, pagefunc, pagecount, pagesize):
2705         PagedList.__init__(self, pagefunc, pagesize, True)
2706         self._pagecount = pagecount
2707
2708     def _getslice(self, start, end):
2709         start_page = start // self._pagesize
2710         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2711         skip_elems = start - start_page * self._pagesize
2712         only_more = None if end is None else end - start
2713         for pagenum in range(start_page, end_page):
2714             page_results = self.getpage(pagenum)
2715             if skip_elems:
2716                 page_results = page_results[skip_elems:]
2717                 skip_elems = None
2718             if only_more is not None:
2719                 if len(page_results) < only_more:
2720                     only_more -= len(page_results)
2721                 else:
2722                     yield from page_results[:only_more]
2723                     break
2724             yield from page_results
2725
2726
2727 def uppercase_escape(s):
2728     unicode_escape = codecs.getdecoder('unicode_escape')
2729     return re.sub(
2730         r'\\U[0-9a-fA-F]{8}',
2731         lambda m: unicode_escape(m.group(0))[0],
2732         s)
2733
2734
2735 def lowercase_escape(s):
2736     unicode_escape = codecs.getdecoder('unicode_escape')
2737     return re.sub(
2738         r'\\u[0-9a-fA-F]{4}',
2739         lambda m: unicode_escape(m.group(0))[0],
2740         s)
2741
2742
2743 def escape_rfc3986(s):
2744     """Escape non-ASCII characters as suggested by RFC 3986"""
2745     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2746
2747
2748 def escape_url(url):
2749     """Escape URL as suggested by RFC 3986"""
2750     url_parsed = compat_urllib_parse_urlparse(url)
2751     return url_parsed._replace(
2752         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2753         path=escape_rfc3986(url_parsed.path),
2754         params=escape_rfc3986(url_parsed.params),
2755         query=escape_rfc3986(url_parsed.query),
2756         fragment=escape_rfc3986(url_parsed.fragment)
2757     ).geturl()
2758
2759
2760 def parse_qs(url):
2761     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2762
2763
2764 def read_batch_urls(batch_fd):
2765     def fixup(url):
2766         if not isinstance(url, compat_str):
2767             url = url.decode('utf-8', 'replace')
2768         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
2769         for bom in BOM_UTF8:
2770             if url.startswith(bom):
2771                 url = url[len(bom):]
2772         url = url.lstrip()
2773         if not url or url.startswith(('#', ';', ']')):
2774             return False
2775         # "#" cannot be stripped out since it is part of the URI
2776         # However, it can be safely stipped out if follwing a whitespace
2777         return re.split(r'\s#', url, 1)[0].rstrip()
2778
2779     with contextlib.closing(batch_fd) as fd:
2780         return [url for url in map(fixup, fd) if url]
2781
2782
2783 def urlencode_postdata(*args, **kargs):
2784     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
2785
2786
2787 def update_url_query(url, query):
2788     if not query:
2789         return url
2790     parsed_url = compat_urlparse.urlparse(url)
2791     qs = compat_parse_qs(parsed_url.query)
2792     qs.update(query)
2793     return compat_urlparse.urlunparse(parsed_url._replace(
2794         query=compat_urllib_parse_urlencode(qs, True)))
2795
2796
2797 def update_Request(req, url=None, data=None, headers={}, query={}):
2798     req_headers = req.headers.copy()
2799     req_headers.update(headers)
2800     req_data = data or req.data
2801     req_url = update_url_query(url or req.get_full_url(), query)
2802     req_get_method = req.get_method()
2803     if req_get_method == 'HEAD':
2804         req_type = HEADRequest
2805     elif req_get_method == 'PUT':
2806         req_type = PUTRequest
2807     else:
2808         req_type = compat_urllib_request.Request
2809     new_req = req_type(
2810         req_url, data=req_data, headers=req_headers,
2811         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
2812     if hasattr(req, 'timeout'):
2813         new_req.timeout = req.timeout
2814     return new_req
2815
2816
2817 def _multipart_encode_impl(data, boundary):
2818     content_type = 'multipart/form-data; boundary=%s' % boundary
2819
2820     out = b''
2821     for k, v in data.items():
2822         out += b'--' + boundary.encode('ascii') + b'\r\n'
2823         if isinstance(k, compat_str):
2824             k = k.encode('utf-8')
2825         if isinstance(v, compat_str):
2826             v = v.encode('utf-8')
2827         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
2828         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
2829         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
2830         if boundary.encode('ascii') in content:
2831             raise ValueError('Boundary overlaps with data')
2832         out += content
2833
2834     out += b'--' + boundary.encode('ascii') + b'--\r\n'
2835
2836     return out, content_type
2837
2838
2839 def multipart_encode(data, boundary=None):
2840     '''
2841     Encode a dict to RFC 7578-compliant form-data
2842
2843     data:
2844         A dict where keys and values can be either Unicode or bytes-like
2845         objects.
2846     boundary:
2847         If specified a Unicode object, it's used as the boundary. Otherwise
2848         a random boundary is generated.
2849
2850     Reference: https://tools.ietf.org/html/rfc7578
2851     '''
2852     has_specified_boundary = boundary is not None
2853
2854     while True:
2855         if boundary is None:
2856             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
2857
2858         try:
2859             out, content_type = _multipart_encode_impl(data, boundary)
2860             break
2861         except ValueError:
2862             if has_specified_boundary:
2863                 raise
2864             boundary = None
2865
2866     return out, content_type
2867
2868
2869 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
2870     for val in map(d.get, variadic(key_or_keys)):
2871         if val is not None and (val or not skip_false_values):
2872             return val
2873     return default
2874
2875
2876 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
2877     for f in funcs:
2878         try:
2879             val = f(*args, **kwargs)
2880         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
2881             pass
2882         else:
2883             if expected_type is None or isinstance(val, expected_type):
2884                 return val
2885
2886
2887 def try_get(src, getter, expected_type=None):
2888     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
2889
2890
2891 def filter_dict(dct, cndn=lambda _, v: v is not None):
2892     return {k: v for k, v in dct.items() if cndn(k, v)}
2893
2894
2895 def merge_dicts(*dicts):
2896     merged = {}
2897     for a_dict in dicts:
2898         for k, v in a_dict.items():
2899             if (v is not None and k not in merged
2900                     or isinstance(v, str) and merged[k] == ''):
2901                 merged[k] = v
2902     return merged
2903
2904
2905 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
2906     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
2907
2908
2909 US_RATINGS = {
2910     'G': 0,
2911     'PG': 10,
2912     'PG-13': 13,
2913     'R': 16,
2914     'NC': 18,
2915 }
2916
2917
2918 TV_PARENTAL_GUIDELINES = {
2919     'TV-Y': 0,
2920     'TV-Y7': 7,
2921     'TV-G': 0,
2922     'TV-PG': 0,
2923     'TV-14': 14,
2924     'TV-MA': 17,
2925 }
2926
2927
2928 def parse_age_limit(s):
2929     # isinstance(False, int) is True. So type() must be used instead
2930     if type(s) is int:
2931         return s if 0 <= s <= 21 else None
2932     elif not isinstance(s, str):
2933         return None
2934     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
2935     if m:
2936         return int(m.group('age'))
2937     s = s.upper()
2938     if s in US_RATINGS:
2939         return US_RATINGS[s]
2940     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
2941     if m:
2942         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
2943     return None
2944
2945
2946 def strip_jsonp(code):
2947     return re.sub(
2948         r'''(?sx)^
2949             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
2950             (?:\s*&&\s*(?P=func_name))?
2951             \s*\(\s*(?P<callback_data>.*)\);?
2952             \s*?(?://[^\n]*)*$''',
2953         r'\g<callback_data>', code)
2954
2955
2956 def js_to_json(code, vars={}):
2957     # vars is a dict of var, val pairs to substitute
2958     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
2959     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
2960     INTEGER_TABLE = (
2961         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
2962         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
2963     )
2964
2965     def fix_kv(m):
2966         v = m.group(0)
2967         if v in ('true', 'false', 'null'):
2968             return v
2969         elif v in ('undefined', 'void 0'):
2970             return 'null'
2971         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
2972             return ""
2973
2974         if v[0] in ("'", '"'):
2975             v = re.sub(r'(?s)\\.|"', lambda m: {
2976                 '"': '\\"',
2977                 "\\'": "'",
2978                 '\\\n': '',
2979                 '\\x': '\\u00',
2980             }.get(m.group(0), m.group(0)), v[1:-1])
2981         else:
2982             for regex, base in INTEGER_TABLE:
2983                 im = re.match(regex, v)
2984                 if im:
2985                     i = int(im.group(1), base)
2986                     return '"%d":' % i if v.endswith(':') else '%d' % i
2987
2988             if v in vars:
2989                 return vars[v]
2990
2991         return '"%s"' % v
2992
2993     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
2994
2995     return re.sub(r'''(?sx)
2996         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
2997         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
2998         {comment}|,(?={skip}[\]}}])|
2999         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3000         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3001         [0-9]+(?={skip}:)|
3002         !+
3003         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3004
3005
3006 def qualities(quality_ids):
3007     """ Get a numeric quality value out of a list of possible values """
3008     def q(qid):
3009         try:
3010             return quality_ids.index(qid)
3011         except ValueError:
3012             return -1
3013     return q
3014
3015
3016 POSTPROCESS_WHEN = {'pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'}
3017
3018
3019 DEFAULT_OUTTMPL = {
3020     'default': '%(title)s [%(id)s].%(ext)s',
3021     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3022 }
3023 OUTTMPL_TYPES = {
3024     'chapter': None,
3025     'subtitle': None,
3026     'thumbnail': None,
3027     'description': 'description',
3028     'annotation': 'annotations.xml',
3029     'infojson': 'info.json',
3030     'link': None,
3031     'pl_video': None,
3032     'pl_thumbnail': None,
3033     'pl_description': 'description',
3034     'pl_infojson': 'info.json',
3035 }
3036
3037 # As of [1] format syntax is:
3038 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3039 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3040 STR_FORMAT_RE_TMPL = r'''(?x)
3041     (?<!%)(?P<prefix>(?:%%)*)
3042     %
3043     (?P<has_key>\((?P<key>{0})\))?
3044     (?P<format>
3045         (?P<conversion>[#0\-+ ]+)?
3046         (?P<min_width>\d+)?
3047         (?P<precision>\.\d+)?
3048         (?P<len_mod>[hlL])?  # unused in python
3049         {1}  # conversion type
3050     )
3051 '''
3052
3053
3054 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3055
3056
3057 def limit_length(s, length):
3058     """ Add ellipses to overly long strings """
3059     if s is None:
3060         return None
3061     ELLIPSES = '...'
3062     if len(s) > length:
3063         return s[:length - len(ELLIPSES)] + ELLIPSES
3064     return s
3065
3066
3067 def version_tuple(v):
3068     return tuple(int(e) for e in re.split(r'[-.]', v))
3069
3070
3071 def is_outdated_version(version, limit, assume_new=True):
3072     if not version:
3073         return not assume_new
3074     try:
3075         return version_tuple(version) < version_tuple(limit)
3076     except ValueError:
3077         return not assume_new
3078
3079
3080 def ytdl_is_updateable():
3081     """ Returns if yt-dlp can be updated with -U """
3082
3083     from .update import is_non_updateable
3084
3085     return not is_non_updateable()
3086
3087
3088 def args_to_str(args):
3089     # Get a short string representation for a subprocess command
3090     return ' '.join(compat_shlex_quote(a) for a in args)
3091
3092
3093 def error_to_compat_str(err):
3094     return str(err)
3095
3096
3097 def error_to_str(err):
3098     return f'{type(err).__name__}: {err}'
3099
3100
3101 def mimetype2ext(mt):
3102     if mt is None:
3103         return None
3104
3105     mt, _, params = mt.partition(';')
3106     mt = mt.strip()
3107
3108     FULL_MAP = {
3109         'audio/mp4': 'm4a',
3110         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3111         # it's the most popular one
3112         'audio/mpeg': 'mp3',
3113         'audio/x-wav': 'wav',
3114         'audio/wav': 'wav',
3115         'audio/wave': 'wav',
3116     }
3117
3118     ext = FULL_MAP.get(mt)
3119     if ext is not None:
3120         return ext
3121
3122     SUBTYPE_MAP = {
3123         '3gpp': '3gp',
3124         'smptett+xml': 'tt',
3125         'ttaf+xml': 'dfxp',
3126         'ttml+xml': 'ttml',
3127         'x-flv': 'flv',
3128         'x-mp4-fragmented': 'mp4',
3129         'x-ms-sami': 'sami',
3130         'x-ms-wmv': 'wmv',
3131         'mpegurl': 'm3u8',
3132         'x-mpegurl': 'm3u8',
3133         'vnd.apple.mpegurl': 'm3u8',
3134         'dash+xml': 'mpd',
3135         'f4m+xml': 'f4m',
3136         'hds+xml': 'f4m',
3137         'vnd.ms-sstr+xml': 'ism',
3138         'quicktime': 'mov',
3139         'mp2t': 'ts',
3140         'x-wav': 'wav',
3141         'filmstrip+json': 'fs',
3142         'svg+xml': 'svg',
3143     }
3144
3145     _, _, subtype = mt.rpartition('/')
3146     ext = SUBTYPE_MAP.get(subtype.lower())
3147     if ext is not None:
3148         return ext
3149
3150     SUFFIX_MAP = {
3151         'json': 'json',
3152         'xml': 'xml',
3153         'zip': 'zip',
3154         'gzip': 'gz',
3155     }
3156
3157     _, _, suffix = subtype.partition('+')
3158     ext = SUFFIX_MAP.get(suffix)
3159     if ext is not None:
3160         return ext
3161
3162     return subtype.replace('+', '.')
3163
3164
3165 def ext2mimetype(ext_or_url):
3166     if not ext_or_url:
3167         return None
3168     if '.' not in ext_or_url:
3169         ext_or_url = f'file.{ext_or_url}'
3170     return mimetypes.guess_type(ext_or_url)[0]
3171
3172
3173 def parse_codecs(codecs_str):
3174     # http://tools.ietf.org/html/rfc6381
3175     if not codecs_str:
3176         return {}
3177     split_codecs = list(filter(None, map(
3178         str.strip, codecs_str.strip().strip(',').split(','))))
3179     vcodec, acodec, tcodec, hdr = None, None, None, None
3180     for full_codec in split_codecs:
3181         parts = full_codec.split('.')
3182         codec = parts[0].replace('0', '')
3183         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3184                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3185             if not vcodec:
3186                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
3187                 if codec in ('dvh1', 'dvhe'):
3188                     hdr = 'DV'
3189                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
3190                     hdr = 'HDR10'
3191                 elif full_codec.replace('0', '').startswith('vp9.2'):
3192                     hdr = 'HDR10'
3193         elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3194             if not acodec:
3195                 acodec = full_codec
3196         elif codec in ('stpp', 'wvtt',):
3197             if not tcodec:
3198                 tcodec = full_codec
3199         else:
3200             write_string(f'WARNING: Unknown codec {full_codec}\n')
3201     if vcodec or acodec or tcodec:
3202         return {
3203             'vcodec': vcodec or 'none',
3204             'acodec': acodec or 'none',
3205             'dynamic_range': hdr,
3206             **({'tcodec': tcodec} if tcodec is not None else {}),
3207         }
3208     elif len(split_codecs) == 2:
3209         return {
3210             'vcodec': split_codecs[0],
3211             'acodec': split_codecs[1],
3212         }
3213     return {}
3214
3215
3216 def urlhandle_detect_ext(url_handle):
3217     getheader = url_handle.headers.get
3218
3219     cd = getheader('Content-Disposition')
3220     if cd:
3221         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3222         if m:
3223             e = determine_ext(m.group('filename'), default_ext=None)
3224             if e:
3225                 return e
3226
3227     return mimetype2ext(getheader('Content-Type'))
3228
3229
3230 def encode_data_uri(data, mime_type):
3231     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3232
3233
3234 def age_restricted(content_limit, age_limit):
3235     """ Returns True iff the content should be blocked """
3236
3237     if age_limit is None:  # No limit set
3238         return False
3239     if content_limit is None:
3240         return False  # Content available for everyone
3241     return age_limit < content_limit
3242
3243
3244 def is_html(first_bytes):
3245     """ Detect whether a file contains HTML by examining its first bytes. """
3246
3247     BOMS = [
3248         (b'\xef\xbb\xbf', 'utf-8'),
3249         (b'\x00\x00\xfe\xff', 'utf-32-be'),
3250         (b'\xff\xfe\x00\x00', 'utf-32-le'),
3251         (b'\xff\xfe', 'utf-16-le'),
3252         (b'\xfe\xff', 'utf-16-be'),
3253     ]
3254     for bom, enc in BOMS:
3255         if first_bytes.startswith(bom):
3256             s = first_bytes[len(bom):].decode(enc, 'replace')
3257             break
3258     else:
3259         s = first_bytes.decode('utf-8', 'replace')
3260
3261     return re.match(r'^\s*<', s)
3262
3263
3264 def determine_protocol(info_dict):
3265     protocol = info_dict.get('protocol')
3266     if protocol is not None:
3267         return protocol
3268
3269     url = sanitize_url(info_dict['url'])
3270     if url.startswith('rtmp'):
3271         return 'rtmp'
3272     elif url.startswith('mms'):
3273         return 'mms'
3274     elif url.startswith('rtsp'):
3275         return 'rtsp'
3276
3277     ext = determine_ext(url)
3278     if ext == 'm3u8':
3279         return 'm3u8'
3280     elif ext == 'f4m':
3281         return 'f4m'
3282
3283     return compat_urllib_parse_urlparse(url).scheme
3284
3285
3286 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3287     """ Render a list of rows, each as a list of values.
3288     Text after a \t will be right aligned """
3289     def width(string):
3290         return len(remove_terminal_sequences(string).replace('\t', ''))
3291
3292     def get_max_lens(table):
3293         return [max(width(str(v)) for v in col) for col in zip(*table)]
3294
3295     def filter_using_list(row, filterArray):
3296         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3297
3298     max_lens = get_max_lens(data) if hide_empty else []
3299     header_row = filter_using_list(header_row, max_lens)
3300     data = [filter_using_list(row, max_lens) for row in data]
3301
3302     table = [header_row] + data
3303     max_lens = get_max_lens(table)
3304     extra_gap += 1
3305     if delim:
3306         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3307         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3308     for row in table:
3309         for pos, text in enumerate(map(str, row)):
3310             if '\t' in text:
3311                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3312             else:
3313                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3314     ret = '\n'.join(''.join(row).rstrip() for row in table)
3315     return ret
3316
3317
3318 def _match_one(filter_part, dct, incomplete):
3319     # TODO: Generalize code with YoutubeDL._build_format_filter
3320     STRING_OPERATORS = {
3321         '*=': operator.contains,
3322         '^=': lambda attr, value: attr.startswith(value),
3323         '$=': lambda attr, value: attr.endswith(value),
3324         '~=': lambda attr, value: re.search(value, attr),
3325     }
3326     COMPARISON_OPERATORS = {
3327         **STRING_OPERATORS,
3328         '<=': operator.le,  # "<=" must be defined above "<"
3329         '<': operator.lt,
3330         '>=': operator.ge,
3331         '>': operator.gt,
3332         '=': operator.eq,
3333     }
3334
3335     if isinstance(incomplete, bool):
3336         is_incomplete = lambda _: incomplete
3337     else:
3338         is_incomplete = lambda k: k in incomplete
3339
3340     operator_rex = re.compile(r'''(?x)\s*
3341         (?P<key>[a-z_]+)
3342         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3343         (?:
3344             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3345             (?P<strval>.+?)
3346         )
3347         \s*$
3348         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3349     m = operator_rex.search(filter_part)
3350     if m:
3351         m = m.groupdict()
3352         unnegated_op = COMPARISON_OPERATORS[m['op']]
3353         if m['negation']:
3354             op = lambda attr, value: not unnegated_op(attr, value)
3355         else:
3356             op = unnegated_op
3357         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3358         if m['quote']:
3359             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3360         actual_value = dct.get(m['key'])
3361         numeric_comparison = None
3362         if isinstance(actual_value, (int, float)):
3363             # If the original field is a string and matching comparisonvalue is
3364             # a number we should respect the origin of the original field
3365             # and process comparison value as a string (see
3366             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3367             try:
3368                 numeric_comparison = int(comparison_value)
3369             except ValueError:
3370                 numeric_comparison = parse_filesize(comparison_value)
3371                 if numeric_comparison is None:
3372                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3373                 if numeric_comparison is None:
3374                     numeric_comparison = parse_duration(comparison_value)
3375         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3376             raise ValueError('Operator %s only supports string values!' % m['op'])
3377         if actual_value is None:
3378             return is_incomplete(m['key']) or m['none_inclusive']
3379         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3380
3381     UNARY_OPERATORS = {
3382         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3383         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3384     }
3385     operator_rex = re.compile(r'''(?x)\s*
3386         (?P<op>%s)\s*(?P<key>[a-z_]+)
3387         \s*$
3388         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3389     m = operator_rex.search(filter_part)
3390     if m:
3391         op = UNARY_OPERATORS[m.group('op')]
3392         actual_value = dct.get(m.group('key'))
3393         if is_incomplete(m.group('key')) and actual_value is None:
3394             return True
3395         return op(actual_value)
3396
3397     raise ValueError('Invalid filter part %r' % filter_part)
3398
3399
3400 def match_str(filter_str, dct, incomplete=False):
3401     """ Filter a dictionary with a simple string syntax.
3402     @returns           Whether the filter passes
3403     @param incomplete  Set of keys that is expected to be missing from dct.
3404                        Can be True/False to indicate all/none of the keys may be missing.
3405                        All conditions on incomplete keys pass if the key is missing
3406     """
3407     return all(
3408         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3409         for filter_part in re.split(r'(?<!\\)&', filter_str))
3410
3411
3412 def match_filter_func(filters):
3413     if not filters:
3414         return None
3415     filters = variadic(filters)
3416
3417     def _match_func(info_dict, *args, **kwargs):
3418         if any(match_str(f, info_dict, *args, **kwargs) for f in filters):
3419             return None
3420         else:
3421             video_title = info_dict.get('title') or info_dict.get('id') or 'video'
3422             filter_str = ') | ('.join(map(str.strip, filters))
3423             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3424     return _match_func
3425
3426
3427 def parse_dfxp_time_expr(time_expr):
3428     if not time_expr:
3429         return
3430
3431     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
3432     if mobj:
3433         return float(mobj.group('time_offset'))
3434
3435     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3436     if mobj:
3437         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3438
3439
3440 def srt_subtitles_timecode(seconds):
3441     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3442
3443
3444 def ass_subtitles_timecode(seconds):
3445     time = timetuple_from_msec(seconds * 1000)
3446     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3447
3448
3449 def dfxp2srt(dfxp_data):
3450     '''
3451     @param dfxp_data A bytes-like object containing DFXP data
3452     @returns A unicode object containing converted SRT data
3453     '''
3454     LEGACY_NAMESPACES = (
3455         (b'http://www.w3.org/ns/ttml', [
3456             b'http://www.w3.org/2004/11/ttaf1',
3457             b'http://www.w3.org/2006/04/ttaf1',
3458             b'http://www.w3.org/2006/10/ttaf1',
3459         ]),
3460         (b'http://www.w3.org/ns/ttml#styling', [
3461             b'http://www.w3.org/ns/ttml#style',
3462         ]),
3463     )
3464
3465     SUPPORTED_STYLING = [
3466         'color',
3467         'fontFamily',
3468         'fontSize',
3469         'fontStyle',
3470         'fontWeight',
3471         'textDecoration'
3472     ]
3473
3474     _x = functools.partial(xpath_with_ns, ns_map={
3475         'xml': 'http://www.w3.org/XML/1998/namespace',
3476         'ttml': 'http://www.w3.org/ns/ttml',
3477         'tts': 'http://www.w3.org/ns/ttml#styling',
3478     })
3479
3480     styles = {}
3481     default_style = {}
3482
3483     class TTMLPElementParser:
3484         _out = ''
3485         _unclosed_elements = []
3486         _applied_styles = []
3487
3488         def start(self, tag, attrib):
3489             if tag in (_x('ttml:br'), 'br'):
3490                 self._out += '\n'
3491             else:
3492                 unclosed_elements = []
3493                 style = {}
3494                 element_style_id = attrib.get('style')
3495                 if default_style:
3496                     style.update(default_style)
3497                 if element_style_id:
3498                     style.update(styles.get(element_style_id, {}))
3499                 for prop in SUPPORTED_STYLING:
3500                     prop_val = attrib.get(_x('tts:' + prop))
3501                     if prop_val:
3502                         style[prop] = prop_val
3503                 if style:
3504                     font = ''
3505                     for k, v in sorted(style.items()):
3506                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3507                             continue
3508                         if k == 'color':
3509                             font += ' color="%s"' % v
3510                         elif k == 'fontSize':
3511                             font += ' size="%s"' % v
3512                         elif k == 'fontFamily':
3513                             font += ' face="%s"' % v
3514                         elif k == 'fontWeight' and v == 'bold':
3515                             self._out += '<b>'
3516                             unclosed_elements.append('b')
3517                         elif k == 'fontStyle' and v == 'italic':
3518                             self._out += '<i>'
3519                             unclosed_elements.append('i')
3520                         elif k == 'textDecoration' and v == 'underline':
3521                             self._out += '<u>'
3522                             unclosed_elements.append('u')
3523                     if font:
3524                         self._out += '<font' + font + '>'
3525                         unclosed_elements.append('font')
3526                     applied_style = {}
3527                     if self._applied_styles:
3528                         applied_style.update(self._applied_styles[-1])
3529                     applied_style.update(style)
3530                     self._applied_styles.append(applied_style)
3531                 self._unclosed_elements.append(unclosed_elements)
3532
3533         def end(self, tag):
3534             if tag not in (_x('ttml:br'), 'br'):
3535                 unclosed_elements = self._unclosed_elements.pop()
3536                 for element in reversed(unclosed_elements):
3537                     self._out += '</%s>' % element
3538                 if unclosed_elements and self._applied_styles:
3539                     self._applied_styles.pop()
3540
3541         def data(self, data):
3542             self._out += data
3543
3544         def close(self):
3545             return self._out.strip()
3546
3547     def parse_node(node):
3548         target = TTMLPElementParser()
3549         parser = xml.etree.ElementTree.XMLParser(target=target)
3550         parser.feed(xml.etree.ElementTree.tostring(node))
3551         return parser.close()
3552
3553     for k, v in LEGACY_NAMESPACES:
3554         for ns in v:
3555             dfxp_data = dfxp_data.replace(ns, k)
3556
3557     dfxp = compat_etree_fromstring(dfxp_data)
3558     out = []
3559     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3560
3561     if not paras:
3562         raise ValueError('Invalid dfxp/TTML subtitle')
3563
3564     repeat = False
3565     while True:
3566         for style in dfxp.findall(_x('.//ttml:style')):
3567             style_id = style.get('id') or style.get(_x('xml:id'))
3568             if not style_id:
3569                 continue
3570             parent_style_id = style.get('style')
3571             if parent_style_id:
3572                 if parent_style_id not in styles:
3573                     repeat = True
3574                     continue
3575                 styles[style_id] = styles[parent_style_id].copy()
3576             for prop in SUPPORTED_STYLING:
3577                 prop_val = style.get(_x('tts:' + prop))
3578                 if prop_val:
3579                     styles.setdefault(style_id, {})[prop] = prop_val
3580         if repeat:
3581             repeat = False
3582         else:
3583             break
3584
3585     for p in ('body', 'div'):
3586         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3587         if ele is None:
3588             continue
3589         style = styles.get(ele.get('style'))
3590         if not style:
3591             continue
3592         default_style.update(style)
3593
3594     for para, index in zip(paras, itertools.count(1)):
3595         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3596         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3597         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3598         if begin_time is None:
3599             continue
3600         if not end_time:
3601             if not dur:
3602                 continue
3603             end_time = begin_time + dur
3604         out.append('%d\n%s --> %s\n%s\n\n' % (
3605             index,
3606             srt_subtitles_timecode(begin_time),
3607             srt_subtitles_timecode(end_time),
3608             parse_node(para)))
3609
3610     return ''.join(out)
3611
3612
3613 def cli_option(params, command_option, param):
3614     param = params.get(param)
3615     if param:
3616         param = compat_str(param)
3617     return [command_option, param] if param is not None else []
3618
3619
3620 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3621     param = params.get(param)
3622     if param is None:
3623         return []
3624     assert isinstance(param, bool)
3625     if separator:
3626         return [command_option + separator + (true_value if param else false_value)]
3627     return [command_option, true_value if param else false_value]
3628
3629
3630 def cli_valueless_option(params, command_option, param, expected_value=True):
3631     param = params.get(param)
3632     return [command_option] if param == expected_value else []
3633
3634
3635 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3636     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3637         if use_compat:
3638             return argdict
3639         else:
3640             argdict = None
3641     if argdict is None:
3642         return default
3643     assert isinstance(argdict, dict)
3644
3645     assert isinstance(keys, (list, tuple))
3646     for key_list in keys:
3647         arg_list = list(filter(
3648             lambda x: x is not None,
3649             [argdict.get(key.lower()) for key in variadic(key_list)]))
3650         if arg_list:
3651             return [arg for args in arg_list for arg in args]
3652     return default
3653
3654
3655 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3656     main_key, exe = main_key.lower(), exe.lower()
3657     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3658     keys = [f'{root_key}{k}' for k in (keys or [''])]
3659     if root_key in keys:
3660         if main_key != exe:
3661             keys.append((main_key, exe))
3662         keys.append('default')
3663     else:
3664         use_compat = False
3665     return cli_configuration_args(argdict, keys, default, use_compat)
3666
3667
3668 class ISO639Utils:
3669     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3670     _lang_map = {
3671         'aa': 'aar',
3672         'ab': 'abk',
3673         'ae': 'ave',
3674         'af': 'afr',
3675         'ak': 'aka',
3676         'am': 'amh',
3677         'an': 'arg',
3678         'ar': 'ara',
3679         'as': 'asm',
3680         'av': 'ava',
3681         'ay': 'aym',
3682         'az': 'aze',
3683         'ba': 'bak',
3684         'be': 'bel',
3685         'bg': 'bul',
3686         'bh': 'bih',
3687         'bi': 'bis',
3688         'bm': 'bam',
3689         'bn': 'ben',
3690         'bo': 'bod',
3691         'br': 'bre',
3692         'bs': 'bos',
3693         'ca': 'cat',
3694         'ce': 'che',
3695         'ch': 'cha',
3696         'co': 'cos',
3697         'cr': 'cre',
3698         'cs': 'ces',
3699         'cu': 'chu',
3700         'cv': 'chv',
3701         'cy': 'cym',
3702         'da': 'dan',
3703         'de': 'deu',
3704         'dv': 'div',
3705         'dz': 'dzo',
3706         'ee': 'ewe',
3707         'el': 'ell',
3708         'en': 'eng',
3709         'eo': 'epo',
3710         'es': 'spa',
3711         'et': 'est',
3712         'eu': 'eus',
3713         'fa': 'fas',
3714         'ff': 'ful',
3715         'fi': 'fin',
3716         'fj': 'fij',
3717         'fo': 'fao',
3718         'fr': 'fra',
3719         'fy': 'fry',
3720         'ga': 'gle',
3721         'gd': 'gla',
3722         'gl': 'glg',
3723         'gn': 'grn',
3724         'gu': 'guj',
3725         'gv': 'glv',
3726         'ha': 'hau',
3727         'he': 'heb',
3728         'iw': 'heb',  # Replaced by he in 1989 revision
3729         'hi': 'hin',
3730         'ho': 'hmo',
3731         'hr': 'hrv',
3732         'ht': 'hat',
3733         'hu': 'hun',
3734         'hy': 'hye',
3735         'hz': 'her',
3736         'ia': 'ina',
3737         'id': 'ind',
3738         'in': 'ind',  # Replaced by id in 1989 revision
3739         'ie': 'ile',
3740         'ig': 'ibo',
3741         'ii': 'iii',
3742         'ik': 'ipk',
3743         'io': 'ido',
3744         'is': 'isl',
3745         'it': 'ita',
3746         'iu': 'iku',
3747         'ja': 'jpn',
3748         'jv': 'jav',
3749         'ka': 'kat',
3750         'kg': 'kon',
3751         'ki': 'kik',
3752         'kj': 'kua',
3753         'kk': 'kaz',
3754         'kl': 'kal',
3755         'km': 'khm',
3756         'kn': 'kan',
3757         'ko': 'kor',
3758         'kr': 'kau',
3759         'ks': 'kas',
3760         'ku': 'kur',
3761         'kv': 'kom',
3762         'kw': 'cor',
3763         'ky': 'kir',
3764         'la': 'lat',
3765         'lb': 'ltz',
3766         'lg': 'lug',
3767         'li': 'lim',
3768         'ln': 'lin',
3769         'lo': 'lao',
3770         'lt': 'lit',
3771         'lu': 'lub',
3772         'lv': 'lav',
3773         'mg': 'mlg',
3774         'mh': 'mah',
3775         'mi': 'mri',
3776         'mk': 'mkd',
3777         'ml': 'mal',
3778         'mn': 'mon',
3779         'mr': 'mar',
3780         'ms': 'msa',
3781         'mt': 'mlt',
3782         'my': 'mya',
3783         'na': 'nau',
3784         'nb': 'nob',
3785         'nd': 'nde',
3786         'ne': 'nep',
3787         'ng': 'ndo',
3788         'nl': 'nld',
3789         'nn': 'nno',
3790         'no': 'nor',
3791         'nr': 'nbl',
3792         'nv': 'nav',
3793         'ny': 'nya',
3794         'oc': 'oci',
3795         'oj': 'oji',
3796         'om': 'orm',
3797         'or': 'ori',
3798         'os': 'oss',
3799         'pa': 'pan',
3800         'pi': 'pli',
3801         'pl': 'pol',
3802         'ps': 'pus',
3803         'pt': 'por',
3804         'qu': 'que',
3805         'rm': 'roh',
3806         'rn': 'run',
3807         'ro': 'ron',
3808         'ru': 'rus',
3809         'rw': 'kin',
3810         'sa': 'san',
3811         'sc': 'srd',
3812         'sd': 'snd',
3813         'se': 'sme',
3814         'sg': 'sag',
3815         'si': 'sin',
3816         'sk': 'slk',
3817         'sl': 'slv',
3818         'sm': 'smo',
3819         'sn': 'sna',
3820         'so': 'som',
3821         'sq': 'sqi',
3822         'sr': 'srp',
3823         'ss': 'ssw',
3824         'st': 'sot',
3825         'su': 'sun',
3826         'sv': 'swe',
3827         'sw': 'swa',
3828         'ta': 'tam',
3829         'te': 'tel',
3830         'tg': 'tgk',
3831         'th': 'tha',
3832         'ti': 'tir',
3833         'tk': 'tuk',
3834         'tl': 'tgl',
3835         'tn': 'tsn',
3836         'to': 'ton',
3837         'tr': 'tur',
3838         'ts': 'tso',
3839         'tt': 'tat',
3840         'tw': 'twi',
3841         'ty': 'tah',
3842         'ug': 'uig',
3843         'uk': 'ukr',
3844         'ur': 'urd',
3845         'uz': 'uzb',
3846         've': 'ven',
3847         'vi': 'vie',
3848         'vo': 'vol',
3849         'wa': 'wln',
3850         'wo': 'wol',
3851         'xh': 'xho',
3852         'yi': 'yid',
3853         'ji': 'yid',  # Replaced by yi in 1989 revision
3854         'yo': 'yor',
3855         'za': 'zha',
3856         'zh': 'zho',
3857         'zu': 'zul',
3858     }
3859
3860     @classmethod
3861     def short2long(cls, code):
3862         """Convert language code from ISO 639-1 to ISO 639-2/T"""
3863         return cls._lang_map.get(code[:2])
3864
3865     @classmethod
3866     def long2short(cls, code):
3867         """Convert language code from ISO 639-2/T to ISO 639-1"""
3868         for short_name, long_name in cls._lang_map.items():
3869             if long_name == code:
3870                 return short_name
3871
3872
3873 class ISO3166Utils:
3874     # From http://data.okfn.org/data/core/country-list
3875     _country_map = {
3876         'AF': 'Afghanistan',
3877         'AX': 'Åland Islands',
3878         'AL': 'Albania',
3879         'DZ': 'Algeria',
3880         'AS': 'American Samoa',
3881         'AD': 'Andorra',
3882         'AO': 'Angola',
3883         'AI': 'Anguilla',
3884         'AQ': 'Antarctica',
3885         'AG': 'Antigua and Barbuda',
3886         'AR': 'Argentina',
3887         'AM': 'Armenia',
3888         'AW': 'Aruba',
3889         'AU': 'Australia',
3890         'AT': 'Austria',
3891         'AZ': 'Azerbaijan',
3892         'BS': 'Bahamas',
3893         'BH': 'Bahrain',
3894         'BD': 'Bangladesh',
3895         'BB': 'Barbados',
3896         'BY': 'Belarus',
3897         'BE': 'Belgium',
3898         'BZ': 'Belize',
3899         'BJ': 'Benin',
3900         'BM': 'Bermuda',
3901         'BT': 'Bhutan',
3902         'BO': 'Bolivia, Plurinational State of',
3903         'BQ': 'Bonaire, Sint Eustatius and Saba',
3904         'BA': 'Bosnia and Herzegovina',
3905         'BW': 'Botswana',
3906         'BV': 'Bouvet Island',
3907         'BR': 'Brazil',
3908         'IO': 'British Indian Ocean Territory',
3909         'BN': 'Brunei Darussalam',
3910         'BG': 'Bulgaria',
3911         'BF': 'Burkina Faso',
3912         'BI': 'Burundi',
3913         'KH': 'Cambodia',
3914         'CM': 'Cameroon',
3915         'CA': 'Canada',
3916         'CV': 'Cape Verde',
3917         'KY': 'Cayman Islands',
3918         'CF': 'Central African Republic',
3919         'TD': 'Chad',
3920         'CL': 'Chile',
3921         'CN': 'China',
3922         'CX': 'Christmas Island',
3923         'CC': 'Cocos (Keeling) Islands',
3924         'CO': 'Colombia',
3925         'KM': 'Comoros',
3926         'CG': 'Congo',
3927         'CD': 'Congo, the Democratic Republic of the',
3928         'CK': 'Cook Islands',
3929         'CR': 'Costa Rica',
3930         'CI': 'Côte d\'Ivoire',
3931         'HR': 'Croatia',
3932         'CU': 'Cuba',
3933         'CW': 'Curaçao',
3934         'CY': 'Cyprus',
3935         'CZ': 'Czech Republic',
3936         'DK': 'Denmark',
3937         'DJ': 'Djibouti',
3938         'DM': 'Dominica',
3939         'DO': 'Dominican Republic',
3940         'EC': 'Ecuador',
3941         'EG': 'Egypt',
3942         'SV': 'El Salvador',
3943         'GQ': 'Equatorial Guinea',
3944         'ER': 'Eritrea',
3945         'EE': 'Estonia',
3946         'ET': 'Ethiopia',
3947         'FK': 'Falkland Islands (Malvinas)',
3948         'FO': 'Faroe Islands',
3949         'FJ': 'Fiji',
3950         'FI': 'Finland',
3951         'FR': 'France',
3952         'GF': 'French Guiana',
3953         'PF': 'French Polynesia',
3954         'TF': 'French Southern Territories',
3955         'GA': 'Gabon',
3956         'GM': 'Gambia',
3957         'GE': 'Georgia',
3958         'DE': 'Germany',
3959         'GH': 'Ghana',
3960         'GI': 'Gibraltar',
3961         'GR': 'Greece',
3962         'GL': 'Greenland',
3963         'GD': 'Grenada',
3964         'GP': 'Guadeloupe',
3965         'GU': 'Guam',
3966         'GT': 'Guatemala',
3967         'GG': 'Guernsey',
3968         'GN': 'Guinea',
3969         'GW': 'Guinea-Bissau',
3970         'GY': 'Guyana',
3971         'HT': 'Haiti',
3972         'HM': 'Heard Island and McDonald Islands',
3973         'VA': 'Holy See (Vatican City State)',
3974         'HN': 'Honduras',
3975         'HK': 'Hong Kong',
3976         'HU': 'Hungary',
3977         'IS': 'Iceland',
3978         'IN': 'India',
3979         'ID': 'Indonesia',
3980         'IR': 'Iran, Islamic Republic of',
3981         'IQ': 'Iraq',
3982         'IE': 'Ireland',
3983         'IM': 'Isle of Man',
3984         'IL': 'Israel',
3985         'IT': 'Italy',
3986         'JM': 'Jamaica',
3987         'JP': 'Japan',
3988         'JE': 'Jersey',
3989         'JO': 'Jordan',
3990         'KZ': 'Kazakhstan',
3991         'KE': 'Kenya',
3992         'KI': 'Kiribati',
3993         'KP': 'Korea, Democratic People\'s Republic of',
3994         'KR': 'Korea, Republic of',
3995         'KW': 'Kuwait',
3996         'KG': 'Kyrgyzstan',
3997         'LA': 'Lao People\'s Democratic Republic',
3998         'LV': 'Latvia',
3999         'LB': 'Lebanon',
4000         'LS': 'Lesotho',
4001         'LR': 'Liberia',
4002         'LY': 'Libya',
4003         'LI': 'Liechtenstein',
4004         'LT': 'Lithuania',
4005         'LU': 'Luxembourg',
4006         'MO': 'Macao',
4007         'MK': 'Macedonia, the Former Yugoslav Republic of',
4008         'MG': 'Madagascar',
4009         'MW': 'Malawi',
4010         'MY': 'Malaysia',
4011         'MV': 'Maldives',
4012         'ML': 'Mali',
4013         'MT': 'Malta',
4014         'MH': 'Marshall Islands',
4015         'MQ': 'Martinique',
4016         'MR': 'Mauritania',
4017         'MU': 'Mauritius',
4018         'YT': 'Mayotte',
4019         'MX': 'Mexico',
4020         'FM': 'Micronesia, Federated States of',
4021         'MD': 'Moldova, Republic of',
4022         'MC': 'Monaco',
4023         'MN': 'Mongolia',
4024         'ME': 'Montenegro',
4025         'MS': 'Montserrat',
4026         'MA': 'Morocco',
4027         'MZ': 'Mozambique',
4028         'MM': 'Myanmar',
4029         'NA': 'Namibia',
4030         'NR': 'Nauru',
4031         'NP': 'Nepal',
4032         'NL': 'Netherlands',
4033         'NC': 'New Caledonia',
4034         'NZ': 'New Zealand',
4035         'NI': 'Nicaragua',
4036         'NE': 'Niger',
4037         'NG': 'Nigeria',
4038         'NU': 'Niue',
4039         'NF': 'Norfolk Island',
4040         'MP': 'Northern Mariana Islands',
4041         'NO': 'Norway',
4042         'OM': 'Oman',
4043         'PK': 'Pakistan',
4044         'PW': 'Palau',
4045         'PS': 'Palestine, State of',
4046         'PA': 'Panama',
4047         'PG': 'Papua New Guinea',
4048         'PY': 'Paraguay',
4049         'PE': 'Peru',
4050         'PH': 'Philippines',
4051         'PN': 'Pitcairn',
4052         'PL': 'Poland',
4053         'PT': 'Portugal',
4054         'PR': 'Puerto Rico',
4055         'QA': 'Qatar',
4056         'RE': 'Réunion',
4057         'RO': 'Romania',
4058         'RU': 'Russian Federation',
4059         'RW': 'Rwanda',
4060         'BL': 'Saint Barthélemy',
4061         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4062         'KN': 'Saint Kitts and Nevis',
4063         'LC': 'Saint Lucia',
4064         'MF': 'Saint Martin (French part)',
4065         'PM': 'Saint Pierre and Miquelon',
4066         'VC': 'Saint Vincent and the Grenadines',
4067         'WS': 'Samoa',
4068         'SM': 'San Marino',
4069         'ST': 'Sao Tome and Principe',
4070         'SA': 'Saudi Arabia',
4071         'SN': 'Senegal',
4072         'RS': 'Serbia',
4073         'SC': 'Seychelles',
4074         'SL': 'Sierra Leone',
4075         'SG': 'Singapore',
4076         'SX': 'Sint Maarten (Dutch part)',
4077         'SK': 'Slovakia',
4078         'SI': 'Slovenia',
4079         'SB': 'Solomon Islands',
4080         'SO': 'Somalia',
4081         'ZA': 'South Africa',
4082         'GS': 'South Georgia and the South Sandwich Islands',
4083         'SS': 'South Sudan',
4084         'ES': 'Spain',
4085         'LK': 'Sri Lanka',
4086         'SD': 'Sudan',
4087         'SR': 'Suriname',
4088         'SJ': 'Svalbard and Jan Mayen',
4089         'SZ': 'Swaziland',
4090         'SE': 'Sweden',
4091         'CH': 'Switzerland',
4092         'SY': 'Syrian Arab Republic',
4093         'TW': 'Taiwan, Province of China',
4094         'TJ': 'Tajikistan',
4095         'TZ': 'Tanzania, United Republic of',
4096         'TH': 'Thailand',
4097         'TL': 'Timor-Leste',
4098         'TG': 'Togo',
4099         'TK': 'Tokelau',
4100         'TO': 'Tonga',
4101         'TT': 'Trinidad and Tobago',
4102         'TN': 'Tunisia',
4103         'TR': 'Turkey',
4104         'TM': 'Turkmenistan',
4105         'TC': 'Turks and Caicos Islands',
4106         'TV': 'Tuvalu',
4107         'UG': 'Uganda',
4108         'UA': 'Ukraine',
4109         'AE': 'United Arab Emirates',
4110         'GB': 'United Kingdom',
4111         'US': 'United States',
4112         'UM': 'United States Minor Outlying Islands',
4113         'UY': 'Uruguay',
4114         'UZ': 'Uzbekistan',
4115         'VU': 'Vanuatu',
4116         'VE': 'Venezuela, Bolivarian Republic of',
4117         'VN': 'Viet Nam',
4118         'VG': 'Virgin Islands, British',
4119         'VI': 'Virgin Islands, U.S.',
4120         'WF': 'Wallis and Futuna',
4121         'EH': 'Western Sahara',
4122         'YE': 'Yemen',
4123         'ZM': 'Zambia',
4124         'ZW': 'Zimbabwe',
4125     }
4126
4127     @classmethod
4128     def short2full(cls, code):
4129         """Convert an ISO 3166-2 country code to the corresponding full name"""
4130         return cls._country_map.get(code.upper())
4131
4132
4133 class GeoUtils:
4134     # Major IPv4 address blocks per country
4135     _country_ip_map = {
4136         'AD': '46.172.224.0/19',
4137         'AE': '94.200.0.0/13',
4138         'AF': '149.54.0.0/17',
4139         'AG': '209.59.64.0/18',
4140         'AI': '204.14.248.0/21',
4141         'AL': '46.99.0.0/16',
4142         'AM': '46.70.0.0/15',
4143         'AO': '105.168.0.0/13',
4144         'AP': '182.50.184.0/21',
4145         'AQ': '23.154.160.0/24',
4146         'AR': '181.0.0.0/12',
4147         'AS': '202.70.112.0/20',
4148         'AT': '77.116.0.0/14',
4149         'AU': '1.128.0.0/11',
4150         'AW': '181.41.0.0/18',
4151         'AX': '185.217.4.0/22',
4152         'AZ': '5.197.0.0/16',
4153         'BA': '31.176.128.0/17',
4154         'BB': '65.48.128.0/17',
4155         'BD': '114.130.0.0/16',
4156         'BE': '57.0.0.0/8',
4157         'BF': '102.178.0.0/15',
4158         'BG': '95.42.0.0/15',
4159         'BH': '37.131.0.0/17',
4160         'BI': '154.117.192.0/18',
4161         'BJ': '137.255.0.0/16',
4162         'BL': '185.212.72.0/23',
4163         'BM': '196.12.64.0/18',
4164         'BN': '156.31.0.0/16',
4165         'BO': '161.56.0.0/16',
4166         'BQ': '161.0.80.0/20',
4167         'BR': '191.128.0.0/12',
4168         'BS': '24.51.64.0/18',
4169         'BT': '119.2.96.0/19',
4170         'BW': '168.167.0.0/16',
4171         'BY': '178.120.0.0/13',
4172         'BZ': '179.42.192.0/18',
4173         'CA': '99.224.0.0/11',
4174         'CD': '41.243.0.0/16',
4175         'CF': '197.242.176.0/21',
4176         'CG': '160.113.0.0/16',
4177         'CH': '85.0.0.0/13',
4178         'CI': '102.136.0.0/14',
4179         'CK': '202.65.32.0/19',
4180         'CL': '152.172.0.0/14',
4181         'CM': '102.244.0.0/14',
4182         'CN': '36.128.0.0/10',
4183         'CO': '181.240.0.0/12',
4184         'CR': '201.192.0.0/12',
4185         'CU': '152.206.0.0/15',
4186         'CV': '165.90.96.0/19',
4187         'CW': '190.88.128.0/17',
4188         'CY': '31.153.0.0/16',
4189         'CZ': '88.100.0.0/14',
4190         'DE': '53.0.0.0/8',
4191         'DJ': '197.241.0.0/17',
4192         'DK': '87.48.0.0/12',
4193         'DM': '192.243.48.0/20',
4194         'DO': '152.166.0.0/15',
4195         'DZ': '41.96.0.0/12',
4196         'EC': '186.68.0.0/15',
4197         'EE': '90.190.0.0/15',
4198         'EG': '156.160.0.0/11',
4199         'ER': '196.200.96.0/20',
4200         'ES': '88.0.0.0/11',
4201         'ET': '196.188.0.0/14',
4202         'EU': '2.16.0.0/13',
4203         'FI': '91.152.0.0/13',
4204         'FJ': '144.120.0.0/16',
4205         'FK': '80.73.208.0/21',
4206         'FM': '119.252.112.0/20',
4207         'FO': '88.85.32.0/19',
4208         'FR': '90.0.0.0/9',
4209         'GA': '41.158.0.0/15',
4210         'GB': '25.0.0.0/8',
4211         'GD': '74.122.88.0/21',
4212         'GE': '31.146.0.0/16',
4213         'GF': '161.22.64.0/18',
4214         'GG': '62.68.160.0/19',
4215         'GH': '154.160.0.0/12',
4216         'GI': '95.164.0.0/16',
4217         'GL': '88.83.0.0/19',
4218         'GM': '160.182.0.0/15',
4219         'GN': '197.149.192.0/18',
4220         'GP': '104.250.0.0/19',
4221         'GQ': '105.235.224.0/20',
4222         'GR': '94.64.0.0/13',
4223         'GT': '168.234.0.0/16',
4224         'GU': '168.123.0.0/16',
4225         'GW': '197.214.80.0/20',
4226         'GY': '181.41.64.0/18',
4227         'HK': '113.252.0.0/14',
4228         'HN': '181.210.0.0/16',
4229         'HR': '93.136.0.0/13',
4230         'HT': '148.102.128.0/17',
4231         'HU': '84.0.0.0/14',
4232         'ID': '39.192.0.0/10',
4233         'IE': '87.32.0.0/12',
4234         'IL': '79.176.0.0/13',
4235         'IM': '5.62.80.0/20',
4236         'IN': '117.192.0.0/10',
4237         'IO': '203.83.48.0/21',
4238         'IQ': '37.236.0.0/14',
4239         'IR': '2.176.0.0/12',
4240         'IS': '82.221.0.0/16',
4241         'IT': '79.0.0.0/10',
4242         'JE': '87.244.64.0/18',
4243         'JM': '72.27.0.0/17',
4244         'JO': '176.29.0.0/16',
4245         'JP': '133.0.0.0/8',
4246         'KE': '105.48.0.0/12',
4247         'KG': '158.181.128.0/17',
4248         'KH': '36.37.128.0/17',
4249         'KI': '103.25.140.0/22',
4250         'KM': '197.255.224.0/20',
4251         'KN': '198.167.192.0/19',
4252         'KP': '175.45.176.0/22',
4253         'KR': '175.192.0.0/10',
4254         'KW': '37.36.0.0/14',
4255         'KY': '64.96.0.0/15',
4256         'KZ': '2.72.0.0/13',
4257         'LA': '115.84.64.0/18',
4258         'LB': '178.135.0.0/16',
4259         'LC': '24.92.144.0/20',
4260         'LI': '82.117.0.0/19',
4261         'LK': '112.134.0.0/15',
4262         'LR': '102.183.0.0/16',
4263         'LS': '129.232.0.0/17',
4264         'LT': '78.56.0.0/13',
4265         'LU': '188.42.0.0/16',
4266         'LV': '46.109.0.0/16',
4267         'LY': '41.252.0.0/14',
4268         'MA': '105.128.0.0/11',
4269         'MC': '88.209.64.0/18',
4270         'MD': '37.246.0.0/16',
4271         'ME': '178.175.0.0/17',
4272         'MF': '74.112.232.0/21',
4273         'MG': '154.126.0.0/17',
4274         'MH': '117.103.88.0/21',
4275         'MK': '77.28.0.0/15',
4276         'ML': '154.118.128.0/18',
4277         'MM': '37.111.0.0/17',
4278         'MN': '49.0.128.0/17',
4279         'MO': '60.246.0.0/16',
4280         'MP': '202.88.64.0/20',
4281         'MQ': '109.203.224.0/19',
4282         'MR': '41.188.64.0/18',
4283         'MS': '208.90.112.0/22',
4284         'MT': '46.11.0.0/16',
4285         'MU': '105.16.0.0/12',
4286         'MV': '27.114.128.0/18',
4287         'MW': '102.70.0.0/15',
4288         'MX': '187.192.0.0/11',
4289         'MY': '175.136.0.0/13',
4290         'MZ': '197.218.0.0/15',
4291         'NA': '41.182.0.0/16',
4292         'NC': '101.101.0.0/18',
4293         'NE': '197.214.0.0/18',
4294         'NF': '203.17.240.0/22',
4295         'NG': '105.112.0.0/12',
4296         'NI': '186.76.0.0/15',
4297         'NL': '145.96.0.0/11',
4298         'NO': '84.208.0.0/13',
4299         'NP': '36.252.0.0/15',
4300         'NR': '203.98.224.0/19',
4301         'NU': '49.156.48.0/22',
4302         'NZ': '49.224.0.0/14',
4303         'OM': '5.36.0.0/15',
4304         'PA': '186.72.0.0/15',
4305         'PE': '186.160.0.0/14',
4306         'PF': '123.50.64.0/18',
4307         'PG': '124.240.192.0/19',
4308         'PH': '49.144.0.0/13',
4309         'PK': '39.32.0.0/11',
4310         'PL': '83.0.0.0/11',
4311         'PM': '70.36.0.0/20',
4312         'PR': '66.50.0.0/16',
4313         'PS': '188.161.0.0/16',
4314         'PT': '85.240.0.0/13',
4315         'PW': '202.124.224.0/20',
4316         'PY': '181.120.0.0/14',
4317         'QA': '37.210.0.0/15',
4318         'RE': '102.35.0.0/16',
4319         'RO': '79.112.0.0/13',
4320         'RS': '93.86.0.0/15',
4321         'RU': '5.136.0.0/13',
4322         'RW': '41.186.0.0/16',
4323         'SA': '188.48.0.0/13',
4324         'SB': '202.1.160.0/19',
4325         'SC': '154.192.0.0/11',
4326         'SD': '102.120.0.0/13',
4327         'SE': '78.64.0.0/12',
4328         'SG': '8.128.0.0/10',
4329         'SI': '188.196.0.0/14',
4330         'SK': '78.98.0.0/15',
4331         'SL': '102.143.0.0/17',
4332         'SM': '89.186.32.0/19',
4333         'SN': '41.82.0.0/15',
4334         'SO': '154.115.192.0/18',
4335         'SR': '186.179.128.0/17',
4336         'SS': '105.235.208.0/21',
4337         'ST': '197.159.160.0/19',
4338         'SV': '168.243.0.0/16',
4339         'SX': '190.102.0.0/20',
4340         'SY': '5.0.0.0/16',
4341         'SZ': '41.84.224.0/19',
4342         'TC': '65.255.48.0/20',
4343         'TD': '154.68.128.0/19',
4344         'TG': '196.168.0.0/14',
4345         'TH': '171.96.0.0/13',
4346         'TJ': '85.9.128.0/18',
4347         'TK': '27.96.24.0/21',
4348         'TL': '180.189.160.0/20',
4349         'TM': '95.85.96.0/19',
4350         'TN': '197.0.0.0/11',
4351         'TO': '175.176.144.0/21',
4352         'TR': '78.160.0.0/11',
4353         'TT': '186.44.0.0/15',
4354         'TV': '202.2.96.0/19',
4355         'TW': '120.96.0.0/11',
4356         'TZ': '156.156.0.0/14',
4357         'UA': '37.52.0.0/14',
4358         'UG': '102.80.0.0/13',
4359         'US': '6.0.0.0/8',
4360         'UY': '167.56.0.0/13',
4361         'UZ': '84.54.64.0/18',
4362         'VA': '212.77.0.0/19',
4363         'VC': '207.191.240.0/21',
4364         'VE': '186.88.0.0/13',
4365         'VG': '66.81.192.0/20',
4366         'VI': '146.226.0.0/16',
4367         'VN': '14.160.0.0/11',
4368         'VU': '202.80.32.0/20',
4369         'WF': '117.20.32.0/21',
4370         'WS': '202.4.32.0/19',
4371         'YE': '134.35.0.0/16',
4372         'YT': '41.242.116.0/22',
4373         'ZA': '41.0.0.0/11',
4374         'ZM': '102.144.0.0/13',
4375         'ZW': '102.177.192.0/18',
4376     }
4377
4378     @classmethod
4379     def random_ipv4(cls, code_or_block):
4380         if len(code_or_block) == 2:
4381             block = cls._country_ip_map.get(code_or_block.upper())
4382             if not block:
4383                 return None
4384         else:
4385             block = code_or_block
4386         addr, preflen = block.split('/')
4387         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
4388         addr_max = addr_min | (0xffffffff >> int(preflen))
4389         return compat_str(socket.inet_ntoa(
4390             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
4391
4392
4393 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
4394     def __init__(self, proxies=None):
4395         # Set default handlers
4396         for type in ('http', 'https'):
4397             setattr(self, '%s_open' % type,
4398                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4399                         meth(r, proxy, type))
4400         compat_urllib_request.ProxyHandler.__init__(self, proxies)
4401
4402     def proxy_open(self, req, proxy, type):
4403         req_proxy = req.headers.get('Ytdl-request-proxy')
4404         if req_proxy is not None:
4405             proxy = req_proxy
4406             del req.headers['Ytdl-request-proxy']
4407
4408         if proxy == '__noproxy__':
4409             return None  # No Proxy
4410         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4411             req.add_header('Ytdl-socks-proxy', proxy)
4412             # yt-dlp's http/https handlers do wrapping the socket with socks
4413             return None
4414         return compat_urllib_request.ProxyHandler.proxy_open(
4415             self, req, proxy, type)
4416
4417
4418 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4419 # released into Public Domain
4420 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4421
4422 def long_to_bytes(n, blocksize=0):
4423     """long_to_bytes(n:long, blocksize:int) : string
4424     Convert a long integer to a byte string.
4425
4426     If optional blocksize is given and greater than zero, pad the front of the
4427     byte string with binary zeros so that the length is a multiple of
4428     blocksize.
4429     """
4430     # after much testing, this algorithm was deemed to be the fastest
4431     s = b''
4432     n = int(n)
4433     while n > 0:
4434         s = compat_struct_pack('>I', n & 0xffffffff) + s
4435         n = n >> 32
4436     # strip off leading zeros
4437     for i in range(len(s)):
4438         if s[i] != b'\000'[0]:
4439             break
4440     else:
4441         # only happens when n == 0
4442         s = b'\000'
4443         i = 0
4444     s = s[i:]
4445     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4446     # de-padding being done above, but sigh...
4447     if blocksize > 0 and len(s) % blocksize:
4448         s = (blocksize - len(s) % blocksize) * b'\000' + s
4449     return s
4450
4451
4452 def bytes_to_long(s):
4453     """bytes_to_long(string) : long
4454     Convert a byte string to a long integer.
4455
4456     This is (essentially) the inverse of long_to_bytes().
4457     """
4458     acc = 0
4459     length = len(s)
4460     if length % 4:
4461         extra = (4 - length % 4)
4462         s = b'\000' * extra + s
4463         length = length + extra
4464     for i in range(0, length, 4):
4465         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
4466     return acc
4467
4468
4469 def ohdave_rsa_encrypt(data, exponent, modulus):
4470     '''
4471     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4472
4473     Input:
4474         data: data to encrypt, bytes-like object
4475         exponent, modulus: parameter e and N of RSA algorithm, both integer
4476     Output: hex string of encrypted data
4477
4478     Limitation: supports one block encryption only
4479     '''
4480
4481     payload = int(binascii.hexlify(data[::-1]), 16)
4482     encrypted = pow(payload, exponent, modulus)
4483     return '%x' % encrypted
4484
4485
4486 def pkcs1pad(data, length):
4487     """
4488     Padding input data with PKCS#1 scheme
4489
4490     @param {int[]} data        input data
4491     @param {int}   length      target length
4492     @returns {int[]}           padded data
4493     """
4494     if len(data) > length - 11:
4495         raise ValueError('Input data too long for PKCS#1 padding')
4496
4497     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4498     return [0, 2] + pseudo_random + [0] + data
4499
4500
4501 def encode_base_n(num, n, table=None):
4502     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
4503     if not table:
4504         table = FULL_TABLE[:n]
4505
4506     if n > len(table):
4507         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
4508
4509     if num == 0:
4510         return table[0]
4511
4512     ret = ''
4513     while num:
4514         ret = table[num % n] + ret
4515         num = num // n
4516     return ret
4517
4518
4519 def decode_packed_codes(code):
4520     mobj = re.search(PACKED_CODES_RE, code)
4521     obfuscated_code, base, count, symbols = mobj.groups()
4522     base = int(base)
4523     count = int(count)
4524     symbols = symbols.split('|')
4525     symbol_table = {}
4526
4527     while count:
4528         count -= 1
4529         base_n_count = encode_base_n(count, base)
4530         symbol_table[base_n_count] = symbols[count] or base_n_count
4531
4532     return re.sub(
4533         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4534         obfuscated_code)
4535
4536
4537 def caesar(s, alphabet, shift):
4538     if shift == 0:
4539         return s
4540     l = len(alphabet)
4541     return ''.join(
4542         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4543         for c in s)
4544
4545
4546 def rot47(s):
4547     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4548
4549
4550 def parse_m3u8_attributes(attrib):
4551     info = {}
4552     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4553         if val.startswith('"'):
4554             val = val[1:-1]
4555         info[key] = val
4556     return info
4557
4558
4559 def urshift(val, n):
4560     return val >> n if val >= 0 else (val + 0x100000000) >> n
4561
4562
4563 # Based on png2str() written by @gdkchan and improved by @yokrysty
4564 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4565 def decode_png(png_data):
4566     # Reference: https://www.w3.org/TR/PNG/
4567     header = png_data[8:]
4568
4569     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4570         raise OSError('Not a valid PNG file.')
4571
4572     int_map = {1: '>B', 2: '>H', 4: '>I'}
4573     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
4574
4575     chunks = []
4576
4577     while header:
4578         length = unpack_integer(header[:4])
4579         header = header[4:]
4580
4581         chunk_type = header[:4]
4582         header = header[4:]
4583
4584         chunk_data = header[:length]
4585         header = header[length:]
4586
4587         header = header[4:]  # Skip CRC
4588
4589         chunks.append({
4590             'type': chunk_type,
4591             'length': length,
4592             'data': chunk_data
4593         })
4594
4595     ihdr = chunks[0]['data']
4596
4597     width = unpack_integer(ihdr[:4])
4598     height = unpack_integer(ihdr[4:8])
4599
4600     idat = b''
4601
4602     for chunk in chunks:
4603         if chunk['type'] == b'IDAT':
4604             idat += chunk['data']
4605
4606     if not idat:
4607         raise OSError('Unable to read PNG data.')
4608
4609     decompressed_data = bytearray(zlib.decompress(idat))
4610
4611     stride = width * 3
4612     pixels = []
4613
4614     def _get_pixel(idx):
4615         x = idx % stride
4616         y = idx // stride
4617         return pixels[y][x]
4618
4619     for y in range(height):
4620         basePos = y * (1 + stride)
4621         filter_type = decompressed_data[basePos]
4622
4623         current_row = []
4624
4625         pixels.append(current_row)
4626
4627         for x in range(stride):
4628             color = decompressed_data[1 + basePos + x]
4629             basex = y * stride + x
4630             left = 0
4631             up = 0
4632
4633             if x > 2:
4634                 left = _get_pixel(basex - 3)
4635             if y > 0:
4636                 up = _get_pixel(basex - stride)
4637
4638             if filter_type == 1:  # Sub
4639                 color = (color + left) & 0xff
4640             elif filter_type == 2:  # Up
4641                 color = (color + up) & 0xff
4642             elif filter_type == 3:  # Average
4643                 color = (color + ((left + up) >> 1)) & 0xff
4644             elif filter_type == 4:  # Paeth
4645                 a = left
4646                 b = up
4647                 c = 0
4648
4649                 if x > 2 and y > 0:
4650                     c = _get_pixel(basex - stride - 3)
4651
4652                 p = a + b - c
4653
4654                 pa = abs(p - a)
4655                 pb = abs(p - b)
4656                 pc = abs(p - c)
4657
4658                 if pa <= pb and pa <= pc:
4659                     color = (color + a) & 0xff
4660                 elif pb <= pc:
4661                     color = (color + b) & 0xff
4662                 else:
4663                     color = (color + c) & 0xff
4664
4665             current_row.append(color)
4666
4667     return width, height, pixels
4668
4669
4670 def write_xattr(path, key, value):
4671     # This mess below finds the best xattr tool for the job
4672     try:
4673         # try the pyxattr module...
4674         import xattr
4675
4676         if hasattr(xattr, 'set'):  # pyxattr
4677             # Unicode arguments are not supported in python-pyxattr until
4678             # version 0.5.0
4679             # See https://github.com/ytdl-org/youtube-dl/issues/5498
4680             pyxattr_required_version = '0.5.0'
4681             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
4682                 # TODO: fallback to CLI tools
4683                 raise XAttrUnavailableError(
4684                     'python-pyxattr is detected but is too old. '
4685                     'yt-dlp requires %s or above while your version is %s. '
4686                     'Falling back to other xattr implementations' % (
4687                         pyxattr_required_version, xattr.__version__))
4688
4689             setxattr = xattr.set
4690         else:  # xattr
4691             setxattr = xattr.setxattr
4692
4693         try:
4694             setxattr(path, key, value)
4695         except OSError as e:
4696             raise XAttrMetadataError(e.errno, e.strerror)
4697
4698     except ImportError:
4699         if compat_os_name == 'nt':
4700             # Write xattrs to NTFS Alternate Data Streams:
4701             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4702             assert ':' not in key
4703             assert os.path.exists(path)
4704
4705             ads_fn = path + ':' + key
4706             try:
4707                 with open(ads_fn, 'wb') as f:
4708                     f.write(value)
4709             except OSError as e:
4710                 raise XAttrMetadataError(e.errno, e.strerror)
4711         else:
4712             user_has_setfattr = check_executable('setfattr', ['--version'])
4713             user_has_xattr = check_executable('xattr', ['-h'])
4714
4715             if user_has_setfattr or user_has_xattr:
4716
4717                 value = value.decode('utf-8')
4718                 if user_has_setfattr:
4719                     executable = 'setfattr'
4720                     opts = ['-n', key, '-v', value]
4721                 elif user_has_xattr:
4722                     executable = 'xattr'
4723                     opts = ['-w', key, value]
4724
4725                 cmd = ([encodeFilename(executable, True)]
4726                        + [encodeArgument(o) for o in opts]
4727                        + [encodeFilename(path, True)])
4728
4729                 try:
4730                     p = Popen(
4731                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
4732                 except OSError as e:
4733                     raise XAttrMetadataError(e.errno, e.strerror)
4734                 stdout, stderr = p.communicate_or_kill()
4735                 stderr = stderr.decode('utf-8', 'replace')
4736                 if p.returncode != 0:
4737                     raise XAttrMetadataError(p.returncode, stderr)
4738
4739             else:
4740                 # On Unix, and can't find pyxattr, setfattr, or xattr.
4741                 if sys.platform.startswith('linux'):
4742                     raise XAttrUnavailableError(
4743                         "Couldn't find a tool to set the xattrs. "
4744                         "Install either the python 'pyxattr' or 'xattr' "
4745                         "modules, or the GNU 'attr' package "
4746                         "(which contains the 'setfattr' tool).")
4747                 else:
4748                     raise XAttrUnavailableError(
4749                         "Couldn't find a tool to set the xattrs. "
4750                         "Install either the python 'xattr' module, "
4751                         "or the 'xattr' binary.")
4752
4753
4754 def random_birthday(year_field, month_field, day_field):
4755     start_date = datetime.date(1950, 1, 1)
4756     end_date = datetime.date(1995, 12, 31)
4757     offset = random.randint(0, (end_date - start_date).days)
4758     random_date = start_date + datetime.timedelta(offset)
4759     return {
4760         year_field: str(random_date.year),
4761         month_field: str(random_date.month),
4762         day_field: str(random_date.day),
4763     }
4764
4765
4766 # Templates for internet shortcut files, which are plain text files.
4767 DOT_URL_LINK_TEMPLATE = '''\
4768 [InternetShortcut]
4769 URL=%(url)s
4770 '''
4771
4772 DOT_WEBLOC_LINK_TEMPLATE = '''\
4773 <?xml version="1.0" encoding="UTF-8"?>
4774 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4775 <plist version="1.0">
4776 <dict>
4777 \t<key>URL</key>
4778 \t<string>%(url)s</string>
4779 </dict>
4780 </plist>
4781 '''
4782
4783 DOT_DESKTOP_LINK_TEMPLATE = '''\
4784 [Desktop Entry]
4785 Encoding=UTF-8
4786 Name=%(filename)s
4787 Type=Link
4788 URL=%(url)s
4789 Icon=text-html
4790 '''
4791
4792 LINK_TEMPLATES = {
4793     'url': DOT_URL_LINK_TEMPLATE,
4794     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
4795     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
4796 }
4797
4798
4799 def iri_to_uri(iri):
4800     """
4801     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
4802
4803     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
4804     """
4805
4806     iri_parts = compat_urllib_parse_urlparse(iri)
4807
4808     if '[' in iri_parts.netloc:
4809         raise ValueError('IPv6 URIs are not, yet, supported.')
4810         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
4811
4812     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
4813
4814     net_location = ''
4815     if iri_parts.username:
4816         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
4817         if iri_parts.password is not None:
4818             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
4819         net_location += '@'
4820
4821     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
4822     # The 'idna' encoding produces ASCII text.
4823     if iri_parts.port is not None and iri_parts.port != 80:
4824         net_location += ':' + str(iri_parts.port)
4825
4826     return urllib.parse.urlunparse(
4827         (iri_parts.scheme,
4828             net_location,
4829
4830             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
4831
4832             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
4833             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
4834
4835             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
4836             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
4837
4838             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
4839
4840     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
4841
4842
4843 def to_high_limit_path(path):
4844     if sys.platform in ['win32', 'cygwin']:
4845         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
4846         return '\\\\?\\' + os.path.abspath(path)
4847
4848     return path
4849
4850
4851 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
4852     val = traverse_obj(obj, *variadic(field))
4853     if val in ignore:
4854         return default
4855     return template % (func(val) if func else val)
4856
4857
4858 def clean_podcast_url(url):
4859     return re.sub(r'''(?x)
4860         (?:
4861             (?:
4862                 chtbl\.com/track|
4863                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
4864                 play\.podtrac\.com
4865             )/[^/]+|
4866             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
4867             flex\.acast\.com|
4868             pd(?:
4869                 cn\.co| # https://podcorn.com/analytics-prefix/
4870                 st\.fm # https://podsights.com/docs/
4871             )/e
4872         )/''', '', url)
4873
4874
4875 _HEX_TABLE = '0123456789abcdef'
4876
4877
4878 def random_uuidv4():
4879     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
4880
4881
4882 def make_dir(path, to_screen=None):
4883     try:
4884         dn = os.path.dirname(path)
4885         if dn and not os.path.exists(dn):
4886             os.makedirs(dn)
4887         return True
4888     except OSError as err:
4889         if callable(to_screen) is not None:
4890             to_screen('unable to create directory ' + error_to_compat_str(err))
4891         return False
4892
4893
4894 def get_executable_path():
4895     from zipimport import zipimporter
4896     if hasattr(sys, 'frozen'):  # Running from PyInstaller
4897         path = os.path.dirname(sys.executable)
4898     elif isinstance(__loader__, zipimporter):  # Running from ZIP
4899         path = os.path.join(os.path.dirname(__file__), '../..')
4900     else:
4901         path = os.path.join(os.path.dirname(__file__), '..')
4902     return os.path.abspath(path)
4903
4904
4905 def load_plugins(name, suffix, namespace):
4906     classes = {}
4907     with contextlib.suppress(FileNotFoundError):
4908         plugins_spec = importlib.util.spec_from_file_location(
4909             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
4910         plugins = importlib.util.module_from_spec(plugins_spec)
4911         sys.modules[plugins_spec.name] = plugins
4912         plugins_spec.loader.exec_module(plugins)
4913         for name in dir(plugins):
4914             if name in namespace:
4915                 continue
4916             if not name.endswith(suffix):
4917                 continue
4918             klass = getattr(plugins, name)
4919             classes[name] = namespace[name] = klass
4920     return classes
4921
4922
4923 def traverse_obj(
4924         obj, *path_list, default=None, expected_type=None, get_all=True,
4925         casesense=True, is_user_input=False, traverse_string=False):
4926     ''' Traverse nested list/dict/tuple
4927     @param path_list        A list of paths which are checked one by one.
4928                             Each path is a list of keys where each key is a:
4929                               - None:     Do nothing
4930                               - string:   A dictionary key
4931                               - int:      An index into a list
4932                               - tuple:    A list of keys all of which will be traversed
4933                               - Ellipsis: Fetch all values in the object
4934                               - Function: Takes the key and value as arguments
4935                                           and returns whether the key matches or not
4936     @param default          Default value to return
4937     @param expected_type    Only accept final value of this type (Can also be any callable)
4938     @param get_all          Return all the values obtained from a path or only the first one
4939     @param casesense        Whether to consider dictionary keys as case sensitive
4940     @param is_user_input    Whether the keys are generated from user input. If True,
4941                             strings are converted to int/slice if necessary
4942     @param traverse_string  Whether to traverse inside strings. If True, any
4943                             non-compatible object will also be converted into a string
4944     # TODO: Write tests
4945     '''
4946     if not casesense:
4947         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
4948         path_list = (map(_lower, variadic(path)) for path in path_list)
4949
4950     def _traverse_obj(obj, path, _current_depth=0):
4951         nonlocal depth
4952         path = tuple(variadic(path))
4953         for i, key in enumerate(path):
4954             if None in (key, obj):
4955                 return obj
4956             if isinstance(key, (list, tuple)):
4957                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
4958                 key = ...
4959             if key is ...:
4960                 obj = (obj.values() if isinstance(obj, dict)
4961                        else obj if isinstance(obj, (list, tuple, LazyList))
4962                        else str(obj) if traverse_string else [])
4963                 _current_depth += 1
4964                 depth = max(depth, _current_depth)
4965                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
4966             elif callable(key):
4967                 if isinstance(obj, (list, tuple, LazyList)):
4968                     obj = enumerate(obj)
4969                 elif isinstance(obj, dict):
4970                     obj = obj.items()
4971                 else:
4972                     if not traverse_string:
4973                         return None
4974                     obj = str(obj)
4975                 _current_depth += 1
4976                 depth = max(depth, _current_depth)
4977                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
4978             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
4979                 obj = (obj.get(key) if casesense or (key in obj)
4980                        else next((v for k, v in obj.items() if _lower(k) == key), None))
4981             else:
4982                 if is_user_input:
4983                     key = (int_or_none(key) if ':' not in key
4984                            else slice(*map(int_or_none, key.split(':'))))
4985                     if key == slice(None):
4986                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
4987                 if not isinstance(key, (int, slice)):
4988                     return None
4989                 if not isinstance(obj, (list, tuple, LazyList)):
4990                     if not traverse_string:
4991                         return None
4992                     obj = str(obj)
4993                 try:
4994                     obj = obj[key]
4995                 except IndexError:
4996                     return None
4997         return obj
4998
4999     if isinstance(expected_type, type):
5000         type_test = lambda val: val if isinstance(val, expected_type) else None
5001     elif expected_type is not None:
5002         type_test = expected_type
5003     else:
5004         type_test = lambda val: val
5005
5006     for path in path_list:
5007         depth = 0
5008         val = _traverse_obj(obj, path)
5009         if val is not None:
5010             if depth:
5011                 for _ in range(depth - 1):
5012                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5013                 val = [v for v in map(type_test, val) if v is not None]
5014                 if val:
5015                     return val if get_all else val[0]
5016             else:
5017                 val = type_test(val)
5018                 if val is not None:
5019                     return val
5020     return default
5021
5022
5023 def traverse_dict(dictn, keys, casesense=True):
5024     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5025                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5026     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5027
5028
5029 def get_first(obj, keys, **kwargs):
5030     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5031
5032
5033 def variadic(x, allowed_types=(str, bytes, dict)):
5034     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5035
5036
5037 def decode_base(value, digits):
5038     # This will convert given base-x string to scalar (long or int)
5039     table = {char: index for index, char in enumerate(digits)}
5040     result = 0
5041     base = len(digits)
5042     for chr in value:
5043         result *= base
5044         result += table[chr]
5045     return result
5046
5047
5048 def time_seconds(**kwargs):
5049     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5050     return t.timestamp()
5051
5052
5053 # create a JSON Web Signature (jws) with HS256 algorithm
5054 # the resulting format is in JWS Compact Serialization
5055 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5056 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5057 def jwt_encode_hs256(payload_data, key, headers={}):
5058     header_data = {
5059         'alg': 'HS256',
5060         'typ': 'JWT',
5061     }
5062     if headers:
5063         header_data.update(headers)
5064     header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
5065     payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
5066     h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
5067     signature_b64 = base64.b64encode(h.digest())
5068     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5069     return token
5070
5071
5072 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5073 def jwt_decode_hs256(jwt):
5074     header_b64, payload_b64, signature_b64 = jwt.split('.')
5075     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5076     return payload_data
5077
5078
5079 def supports_terminal_sequences(stream):
5080     if compat_os_name == 'nt':
5081         from .compat import WINDOWS_VT_MODE  # Must be imported locally
5082         if not WINDOWS_VT_MODE or get_windows_version() < (10, 0, 10586):
5083             return False
5084     elif not os.getenv('TERM'):
5085         return False
5086     try:
5087         return stream.isatty()
5088     except BaseException:
5089         return False
5090
5091
5092 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5093
5094
5095 def remove_terminal_sequences(string):
5096     return _terminal_sequences_re.sub('', string)
5097
5098
5099 def number_of_digits(number):
5100     return len('%d' % number)
5101
5102
5103 def join_nonempty(*values, delim='-', from_dict=None):
5104     if from_dict is not None:
5105         values = map(from_dict.get, values)
5106     return delim.join(map(str, filter(None, values)))
5107
5108
5109 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5110     """
5111     Find the largest format dimensions in terms of video width and, for each thumbnail:
5112     * Modify the URL: Match the width with the provided regex and replace with the former width
5113     * Update dimensions
5114
5115     This function is useful with video services that scale the provided thumbnails on demand
5116     """
5117     _keys = ('width', 'height')
5118     max_dimensions = max(
5119         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5120         default=(0, 0))
5121     if not max_dimensions[0]:
5122         return thumbnails
5123     return [
5124         merge_dicts(
5125             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5126             dict(zip(_keys, max_dimensions)), thumbnail)
5127         for thumbnail in thumbnails
5128     ]
5129
5130
5131 def parse_http_range(range):
5132     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5133     if not range:
5134         return None, None, None
5135     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5136     if not crg:
5137         return None, None, None
5138     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5139
5140
5141 class Config:
5142     own_args = None
5143     filename = None
5144     __initialized = False
5145
5146     def __init__(self, parser, label=None):
5147         self._parser, self.label = parser, label
5148         self._loaded_paths, self.configs = set(), []
5149
5150     def init(self, args=None, filename=None):
5151         assert not self.__initialized
5152         directory = ''
5153         if filename:
5154             location = os.path.realpath(filename)
5155             directory = os.path.dirname(location)
5156             if location in self._loaded_paths:
5157                 return False
5158             self._loaded_paths.add(location)
5159
5160         self.__initialized = True
5161         self.own_args, self.filename = args, filename
5162         for location in self._parser.parse_args(args)[0].config_locations or []:
5163             location = os.path.join(directory, expand_path(location))
5164             if os.path.isdir(location):
5165                 location = os.path.join(location, 'yt-dlp.conf')
5166             if not os.path.exists(location):
5167                 self._parser.error(f'config location {location} does not exist')
5168             self.append_config(self.read_file(location), location)
5169         return True
5170
5171     def __str__(self):
5172         label = join_nonempty(
5173             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5174             delim=' ')
5175         return join_nonempty(
5176             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5177             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5178             delim='\n')
5179
5180     @staticmethod
5181     def read_file(filename, default=[]):
5182         try:
5183             optionf = open(filename)
5184         except OSError:
5185             return default  # silently skip if file is not present
5186         try:
5187             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5188             contents = optionf.read()
5189             res = shlex.split(contents, comments=True)
5190         finally:
5191             optionf.close()
5192         return res
5193
5194     @staticmethod
5195     def hide_login_info(opts):
5196         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5197         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5198
5199         def _scrub_eq(o):
5200             m = eqre.match(o)
5201             if m:
5202                 return m.group('key') + '=PRIVATE'
5203             else:
5204                 return o
5205
5206         opts = list(map(_scrub_eq, opts))
5207         for idx, opt in enumerate(opts):
5208             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5209                 opts[idx + 1] = 'PRIVATE'
5210         return opts
5211
5212     def append_config(self, *args, label=None):
5213         config = type(self)(self._parser, label)
5214         config._loaded_paths = self._loaded_paths
5215         if config.init(*args):
5216             self.configs.append(config)
5217
5218     @property
5219     def all_args(self):
5220         for config in reversed(self.configs):
5221             yield from config.all_args
5222         yield from self.own_args or []
5223
5224     def parse_args(self):
5225         return self._parser.parse_args(self.all_args)
5226
5227
5228 class WebSocketsWrapper():
5229     """Wraps websockets module to use in non-async scopes"""
5230     pool = None
5231
5232     def __init__(self, url, headers=None, connect=True):
5233         self.loop = asyncio.events.new_event_loop()
5234         self.conn = compat_websockets.connect(
5235             url, extra_headers=headers, ping_interval=None,
5236             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5237         if connect:
5238             self.__enter__()
5239         atexit.register(self.__exit__, None, None, None)
5240
5241     def __enter__(self):
5242         if not self.pool:
5243             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5244         return self
5245
5246     def send(self, *args):
5247         self.run_with_loop(self.pool.send(*args), self.loop)
5248
5249     def recv(self, *args):
5250         return self.run_with_loop(self.pool.recv(*args), self.loop)
5251
5252     def __exit__(self, type, value, traceback):
5253         try:
5254             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5255         finally:
5256             self.loop.close()
5257             self._cancel_all_tasks(self.loop)
5258
5259     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5260     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5261     @staticmethod
5262     def run_with_loop(main, loop):
5263         if not asyncio.coroutines.iscoroutine(main):
5264             raise ValueError(f'a coroutine was expected, got {main!r}')
5265
5266         try:
5267             return loop.run_until_complete(main)
5268         finally:
5269             loop.run_until_complete(loop.shutdown_asyncgens())
5270             if hasattr(loop, 'shutdown_default_executor'):
5271                 loop.run_until_complete(loop.shutdown_default_executor())
5272
5273     @staticmethod
5274     def _cancel_all_tasks(loop):
5275         to_cancel = asyncio.tasks.all_tasks(loop)
5276
5277         if not to_cancel:
5278             return
5279
5280         for task in to_cancel:
5281             task.cancel()
5282
5283         loop.run_until_complete(
5284             asyncio.tasks.gather(*to_cancel, loop=loop, return_exceptions=True))
5285
5286         for task in to_cancel:
5287             if task.cancelled():
5288                 continue
5289             if task.exception() is not None:
5290                 loop.call_exception_handler({
5291                     'message': 'unhandled exception during asyncio.run() shutdown',
5292                     'exception': task.exception(),
5293                     'task': task,
5294                 })
5295
5296
5297 has_websockets = bool(compat_websockets)
5298
5299
5300 def merge_headers(*dicts):
5301     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5302     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5303
5304
5305 class classproperty:
5306     def __init__(self, f):
5307         self.f = f
5308
5309     def __get__(self, _, cls):
5310         return self.f(cls)
5311
5312
5313 def Namespace(**kwargs):
5314     return collections.namedtuple('Namespace', kwargs)(**kwargs)