yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import contextlib
   9 import ctypes
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import importlib.util
  18 import io
  19 import itertools
  20 import json
  21 import locale
  22 import math
  23 import mimetypes
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import shlex
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import urllib.parse
  38 import xml.etree.ElementTree
  39 import zlib
  40
  41 from .compat import asyncio, functools  # Modules
  42 from .compat import (
  43     compat_chr,
  44     compat_cookiejar,
  45     compat_etree_fromstring,
  46     compat_expanduser,
  47     compat_html_entities,
  48     compat_html_entities_html5,
  49     compat_HTMLParseError,
  50     compat_HTMLParser,
  51     compat_http_client,
  52     compat_HTTPError,
  53     compat_os_name,
  54     compat_parse_qs,
  55     compat_shlex_quote,
  56     compat_str,
  57     compat_struct_pack,
  58     compat_struct_unpack,
  59     compat_urllib_error,
  60     compat_urllib_parse_unquote_plus,
  61     compat_urllib_parse_urlencode,
  62     compat_urllib_parse_urlparse,
  63     compat_urllib_request,
  64     compat_urlparse,
  65 )
  66 from .dependencies import brotli, certifi, websockets
  67 from .socks import ProxyType, sockssocket
  68
  69
  70 def register_socks_protocols():
  71     # "Register" SOCKS protocols
  72     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  73     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  74     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  75         if scheme not in compat_urlparse.uses_netloc:
  76             compat_urlparse.uses_netloc.append(scheme)
  77
  78
  79 # This is not clearly defined otherwise
  80 compiled_regex_type = type(re.compile(''))
  81
  82
  83 def random_user_agent():
  84     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  85     _CHROME_VERSIONS = (
  86         '90.0.4430.212',
  87         '90.0.4430.24',
  88         '90.0.4430.70',
  89         '90.0.4430.72',
  90         '90.0.4430.85',
  91         '90.0.4430.93',
  92         '91.0.4472.101',
  93         '91.0.4472.106',
  94         '91.0.4472.114',
  95         '91.0.4472.124',
  96         '91.0.4472.164',
  97         '91.0.4472.19',
  98         '91.0.4472.77',
  99         '92.0.4515.107',
 100         '92.0.4515.115',
 101         '92.0.4515.131',
 102         '92.0.4515.159',
 103         '92.0.4515.43',
 104         '93.0.4556.0',
 105         '93.0.4577.15',
 106         '93.0.4577.63',
 107         '93.0.4577.82',
 108         '94.0.4606.41',
 109         '94.0.4606.54',
 110         '94.0.4606.61',
 111         '94.0.4606.71',
 112         '94.0.4606.81',
 113         '94.0.4606.85',
 114         '95.0.4638.17',
 115         '95.0.4638.50',
 116         '95.0.4638.54',
 117         '95.0.4638.69',
 118         '95.0.4638.74',
 119         '96.0.4664.18',
 120         '96.0.4664.45',
 121         '96.0.4664.55',
 122         '96.0.4664.93',
 123         '97.0.4692.20',
 124     )
 125     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 126
 127
 128 SUPPORTED_ENCODINGS = [
 129     'gzip', 'deflate'
 130 ]
 131 if brotli:
 132     SUPPORTED_ENCODINGS.append('br')
 133
 134 std_headers = {
 135     'User-Agent': random_user_agent(),
 136     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 137     'Accept-Language': 'en-us,en;q=0.5',
 138     'Sec-Fetch-Mode': 'navigate',
 139 }
 140
 141
 142 USER_AGENTS = {
 143     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 144 }
 145
 146
 147 NO_DEFAULT = object()
 148
 149 ENGLISH_MONTH_NAMES = [
 150     'January', 'February', 'March', 'April', 'May', 'June',
 151     'July', 'August', 'September', 'October', 'November', 'December']
 152
 153 MONTH_NAMES = {
 154     'en': ENGLISH_MONTH_NAMES,
 155     'fr': [
 156         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 157         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 158 }
 159
 160 KNOWN_EXTENSIONS = (
 161     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 162     'flv', 'f4v', 'f4a', 'f4b',
 163     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 164     'mkv', 'mka', 'mk3d',
 165     'avi', 'divx',
 166     'mov',
 167     'asf', 'wmv', 'wma',
 168     '3gp', '3g2',
 169     'mp3',
 170     'flac',
 171     'ape',
 172     'wav',
 173     'f4f', 'f4m', 'm3u8', 'smil')
 174
 175 # needed for sanitizing filenames in restricted mode
 176 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 177                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 178                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 179
 180 DATE_FORMATS = (
 181     '%d %B %Y',
 182     '%d %b %Y',
 183     '%B %d %Y',
 184     '%B %dst %Y',
 185     '%B %dnd %Y',
 186     '%B %drd %Y',
 187     '%B %dth %Y',
 188     '%b %d %Y',
 189     '%b %dst %Y',
 190     '%b %dnd %Y',
 191     '%b %drd %Y',
 192     '%b %dth %Y',
 193     '%b %dst %Y %I:%M',
 194     '%b %dnd %Y %I:%M',
 195     '%b %drd %Y %I:%M',
 196     '%b %dth %Y %I:%M',
 197     '%Y %m %d',
 198     '%Y-%m-%d',
 199     '%Y.%m.%d.',
 200     '%Y/%m/%d',
 201     '%Y/%m/%d %H:%M',
 202     '%Y/%m/%d %H:%M:%S',
 203     '%Y%m%d%H%M',
 204     '%Y%m%d%H%M%S',
 205     '%Y%m%d',
 206     '%Y-%m-%d %H:%M',
 207     '%Y-%m-%d %H:%M:%S',
 208     '%Y-%m-%d %H:%M:%S.%f',
 209     '%Y-%m-%d %H:%M:%S:%f',
 210     '%d.%m.%Y %H:%M',
 211     '%d.%m.%Y %H.%M',
 212     '%Y-%m-%dT%H:%M:%SZ',
 213     '%Y-%m-%dT%H:%M:%S.%fZ',
 214     '%Y-%m-%dT%H:%M:%S.%f0Z',
 215     '%Y-%m-%dT%H:%M:%S',
 216     '%Y-%m-%dT%H:%M:%S.%f',
 217     '%Y-%m-%dT%H:%M',
 218     '%b %d %Y at %H:%M',
 219     '%b %d %Y at %H:%M:%S',
 220     '%B %d %Y at %H:%M',
 221     '%B %d %Y at %H:%M:%S',
 222     '%H:%M %d-%b-%Y',
 223 )
 224
 225 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 226 DATE_FORMATS_DAY_FIRST.extend([
 227     '%d-%m-%Y',
 228     '%d.%m.%Y',
 229     '%d.%m.%y',
 230     '%d/%m/%Y',
 231     '%d/%m/%y',
 232     '%d/%m/%Y %H:%M:%S',
 233 ])
 234
 235 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 236 DATE_FORMATS_MONTH_FIRST.extend([
 237     '%m-%d-%Y',
 238     '%m.%d.%Y',
 239     '%m/%d/%Y',
 240     '%m/%d/%y',
 241     '%m/%d/%Y %H:%M:%S',
 242 ])
 243
 244 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 245 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 246
 247 NUMBER_RE = r'\d+(?:\.\d+)?'
 248
 249
 250 @functools.cache
 251 def preferredencoding():
 252     """Get preferred encoding.
 253
 254     Returns the best encoding scheme for the system, based on
 255     locale.getpreferredencoding() and some further tweaks.
 256     """
 257     try:
 258         pref = locale.getpreferredencoding()
 259         'TEST'.encode(pref)
 260     except Exception:
 261         pref = 'UTF-8'
 262
 263     return pref
 264
 265
 266 def write_json_file(obj, fn):
 267     """ Encode obj as JSON and write it to fn, atomically if possible """
 268
 269     tf = tempfile.NamedTemporaryFile(
 270         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 271         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 272
 273     try:
 274         with tf:
 275             json.dump(obj, tf, ensure_ascii=False)
 276         if sys.platform == 'win32':
 277             # Need to remove existing file on Windows, else os.rename raises
 278             # WindowsError or FileExistsError.
 279             with contextlib.suppress(OSError):
 280                 os.unlink(fn)
 281         with contextlib.suppress(OSError):
 282             mask = os.umask(0)
 283             os.umask(mask)
 284             os.chmod(tf.name, 0o666 & ~mask)
 285         os.rename(tf.name, fn)
 286     except Exception:
 287         with contextlib.suppress(OSError):
 288             os.remove(tf.name)
 289         raise
 290
 291
 292 def find_xpath_attr(node, xpath, key, val=None):
 293     """ Find the xpath xpath[@key=val] """
 294     assert re.match(r'^[a-zA-Z_-]+$', key)
 295     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 296     return node.find(expr)
 297
 298 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 299 # the namespace parameter
 300
 301
 302 def xpath_with_ns(path, ns_map):
 303     components = [c.split(':') for c in path.split('/')]
 304     replaced = []
 305     for c in components:
 306         if len(c) == 1:
 307             replaced.append(c[0])
 308         else:
 309             ns, tag = c
 310             replaced.append('{%s}%s' % (ns_map[ns], tag))
 311     return '/'.join(replaced)
 312
 313
 314 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 315     def _find_xpath(xpath):
 316         return node.find(xpath)
 317
 318     if isinstance(xpath, (str, compat_str)):
 319         n = _find_xpath(xpath)
 320     else:
 321         for xp in xpath:
 322             n = _find_xpath(xp)
 323             if n is not None:
 324                 break
 325
 326     if n is None:
 327         if default is not NO_DEFAULT:
 328             return default
 329         elif fatal:
 330             name = xpath if name is None else name
 331             raise ExtractorError('Could not find XML element %s' % name)
 332         else:
 333             return None
 334     return n
 335
 336
 337 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 338     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 339     if n is None or n == default:
 340         return n
 341     if n.text is None:
 342         if default is not NO_DEFAULT:
 343             return default
 344         elif fatal:
 345             name = xpath if name is None else name
 346             raise ExtractorError('Could not find XML element\'s text %s' % name)
 347         else:
 348             return None
 349     return n.text
 350
 351
 352 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 353     n = find_xpath_attr(node, xpath, key)
 354     if n is None:
 355         if default is not NO_DEFAULT:
 356             return default
 357         elif fatal:
 358             name = f'{xpath}[@{key}]' if name is None else name
 359             raise ExtractorError('Could not find XML attribute %s' % name)
 360         else:
 361             return None
 362     return n.attrib[key]
 363
 364
 365 def get_element_by_id(id, html):
 366     """Return the content of the tag with the specified ID in the passed HTML document"""
 367     return get_element_by_attribute('id', id, html)
 368
 369
 370 def get_element_html_by_id(id, html):
 371     """Return the html of the tag with the specified ID in the passed HTML document"""
 372     return get_element_html_by_attribute('id', id, html)
 373
 374
 375 def get_element_by_class(class_name, html):
 376     """Return the content of the first tag with the specified class in the passed HTML document"""
 377     retval = get_elements_by_class(class_name, html)
 378     return retval[0] if retval else None
 379
 380
 381 def get_element_html_by_class(class_name, html):
 382     """Return the html of the first tag with the specified class in the passed HTML document"""
 383     retval = get_elements_html_by_class(class_name, html)
 384     return retval[0] if retval else None
 385
 386
 387 def get_element_by_attribute(attribute, value, html, escape_value=True):
 388     retval = get_elements_by_attribute(attribute, value, html, escape_value)
 389     return retval[0] if retval else None
 390
 391
 392 def get_element_html_by_attribute(attribute, value, html, escape_value=True):
 393     retval = get_elements_html_by_attribute(attribute, value, html, escape_value)
 394     return retval[0] if retval else None
 395
 396
 397 def get_elements_by_class(class_name, html):
 398     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 399     return get_elements_by_attribute(
 400         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 401         html, escape_value=False)
 402
 403
 404 def get_elements_html_by_class(class_name, html):
 405     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 406     return get_elements_html_by_attribute(
 407         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 408         html, escape_value=False)
 409
 410
 411 def get_elements_by_attribute(*args, **kwargs):
 412     """Return the content of the tag with the specified attribute in the passed HTML document"""
 413     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 414
 415
 416 def get_elements_html_by_attribute(*args, **kwargs):
 417     """Return the html of the tag with the specified attribute in the passed HTML document"""
 418     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 419
 420
 421 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 422     """
 423     Return the text (content) and the html (whole) of the tag with the specified
 424     attribute in the passed HTML document
 425     """
 426
 427     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 428
 429     value = re.escape(value) if escape_value else value
 430
 431     partial_element_re = rf'''(?x)
 432         <(?P<tag>[a-zA-Z0-9:._-]+)
 433          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 434          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 435         '''
 436
 437     for m in re.finditer(partial_element_re, html):
 438         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 439
 440         yield (
 441             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 442             whole
 443         )
 444
 445
 446 class HTMLBreakOnClosingTagParser(compat_HTMLParser):
 447     """
 448     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 449     closing tag for the first opening tag it has encountered, and can be used
 450     as a context manager
 451     """
 452
 453     class HTMLBreakOnClosingTagException(Exception):
 454         pass
 455
 456     def __init__(self):
 457         self.tagstack = collections.deque()
 458         compat_HTMLParser.__init__(self)
 459
 460     def __enter__(self):
 461         return self
 462
 463     def __exit__(self, *_):
 464         self.close()
 465
 466     def close(self):
 467         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 468         # so data remains buffered; we no longer have any interest in it, thus
 469         # override this method to discard it
 470         pass
 471
 472     def handle_starttag(self, tag, _):
 473         self.tagstack.append(tag)
 474
 475     def handle_endtag(self, tag):
 476         if not self.tagstack:
 477             raise compat_HTMLParseError('no tags in the stack')
 478         while self.tagstack:
 479             inner_tag = self.tagstack.pop()
 480             if inner_tag == tag:
 481                 break
 482         else:
 483             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 484         if not self.tagstack:
 485             raise self.HTMLBreakOnClosingTagException()
 486
 487
 488 def get_element_text_and_html_by_tag(tag, html):
 489     """
 490     For the first element with the specified tag in the passed HTML document
 491     return its' content (text) and the whole element (html)
 492     """
 493     def find_or_raise(haystack, needle, exc):
 494         try:
 495             return haystack.index(needle)
 496         except ValueError:
 497             raise exc
 498     closing_tag = f'</{tag}>'
 499     whole_start = find_or_raise(
 500         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 501     content_start = find_or_raise(
 502         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 503     content_start += whole_start + 1
 504     with HTMLBreakOnClosingTagParser() as parser:
 505         parser.feed(html[whole_start:content_start])
 506         if not parser.tagstack or parser.tagstack[0] != tag:
 507             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 508         offset = content_start
 509         while offset < len(html):
 510             next_closing_tag_start = find_or_raise(
 511                 html[offset:], closing_tag,
 512                 compat_HTMLParseError(f'closing {tag} tag not found'))
 513             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 514             try:
 515                 parser.feed(html[offset:offset + next_closing_tag_end])
 516                 offset += next_closing_tag_end
 517             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 518                 return html[content_start:offset + next_closing_tag_start], \
 519                     html[whole_start:offset + next_closing_tag_end]
 520         raise compat_HTMLParseError('unexpected end of html')
 521
 522
 523 class HTMLAttributeParser(compat_HTMLParser):
 524     """Trivial HTML parser to gather the attributes for a single element"""
 525
 526     def __init__(self):
 527         self.attrs = {}
 528         compat_HTMLParser.__init__(self)
 529
 530     def handle_starttag(self, tag, attrs):
 531         self.attrs = dict(attrs)
 532
 533
 534 class HTMLListAttrsParser(compat_HTMLParser):
 535     """HTML parser to gather the attributes for the elements of a list"""
 536
 537     def __init__(self):
 538         compat_HTMLParser.__init__(self)
 539         self.items = []
 540         self._level = 0
 541
 542     def handle_starttag(self, tag, attrs):
 543         if tag == 'li' and self._level == 0:
 544             self.items.append(dict(attrs))
 545         self._level += 1
 546
 547     def handle_endtag(self, tag):
 548         self._level -= 1
 549
 550
 551 def extract_attributes(html_element):
 552     """Given a string for an HTML element such as
 553     <el
 554          a="foo" B="bar" c="&98;az" d=boz
 555          empty= noval entity="&amp;"
 556          sq='"' dq="'"
 557     >
 558     Decode and return a dictionary of attributes.
 559     {
 560         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 561         'empty': '', 'noval': None, 'entity': '&',
 562         'sq': '"', 'dq': '\''
 563     }.
 564     """
 565     parser = HTMLAttributeParser()
 566     with contextlib.suppress(compat_HTMLParseError):
 567         parser.feed(html_element)
 568         parser.close()
 569     return parser.attrs
 570
 571
 572 def parse_list(webpage):
 573     """Given a string for an series of HTML <li> elements,
 574     return a dictionary of their attributes"""
 575     parser = HTMLListAttrsParser()
 576     parser.feed(webpage)
 577     parser.close()
 578     return parser.items
 579
 580
 581 def clean_html(html):
 582     """Clean an HTML snippet into a readable string"""
 583
 584     if html is None:  # Convenience for sanitizing descriptions etc.
 585         return html
 586
 587     html = re.sub(r'\s+', ' ', html)
 588     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 589     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 590     # Strip html tags
 591     html = re.sub('<.*?>', '', html)
 592     # Replace html entities
 593     html = unescapeHTML(html)
 594     return html.strip()
 595
 596
 597 def sanitize_open(filename, open_mode):
 598     """Try to open the given filename, and slightly tweak it if this fails.
 599
 600     Attempts to open the given filename. If this fails, it tries to change
 601     the filename slightly, step by step, until it's either able to open it
 602     or it fails and raises a final exception, like the standard open()
 603     function.
 604
 605     It returns the tuple (stream, definitive_file_name).
 606     """
 607     if filename == '-':
 608         if sys.platform == 'win32':
 609             import msvcrt
 610             msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 611         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 612
 613     for attempt in range(2):
 614         try:
 615             try:
 616                 if sys.platform == 'win32':
 617                     # FIXME: An exclusive lock also locks the file from being read.
 618                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 619                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 620                     raise LockingUnsupportedError()
 621                 stream = locked_file(filename, open_mode, block=False).__enter__()
 622             except LockingUnsupportedError:
 623                 stream = open(filename, open_mode)
 624             return (stream, filename)
 625         except OSError as err:
 626             if attempt or err.errno in (errno.EACCES,):
 627                 raise
 628             old_filename, filename = filename, sanitize_path(filename)
 629             if old_filename == filename:
 630                 raise
 631
 632
 633 def timeconvert(timestr):
 634     """Convert RFC 2822 defined time string into system timestamp"""
 635     timestamp = None
 636     timetuple = email.utils.parsedate_tz(timestr)
 637     if timetuple is not None:
 638         timestamp = email.utils.mktime_tz(timetuple)
 639     return timestamp
 640
 641
 642 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 643     """Sanitizes a string so it could be used as part of a filename.
 644     @param restricted   Use a stricter subset of allowed characters
 645     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 646                         If unset, yt-dlp's new sanitization rules are in effect
 647     """
 648     if s == '':
 649         return ''
 650
 651     def replace_insane(char):
 652         if restricted and char in ACCENT_CHARS:
 653             return ACCENT_CHARS[char]
 654         elif not restricted and char == '\n':
 655             return '\0 '
 656         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 657             return ''
 658         elif char == '"':
 659             return '' if restricted else '\''
 660         elif char == ':':
 661             return '\0_\0-' if restricted else '\0 \0-'
 662         elif char in '\\/|*<>':
 663             return '\0_'
 664         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 665             return '\0_'
 666         return char
 667
 668     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 669     result = ''.join(map(replace_insane, s))
 670     if is_id is NO_DEFAULT:
 671         result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result)  # Remove repeated substitute chars
 672         STRIP_RE = '(?:\0.|[ _-])*'
 673         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 674     result = result.replace('\0', '') or '_'
 675
 676     if not is_id:
 677         while '__' in result:
 678             result = result.replace('__', '_')
 679         result = result.strip('_')
 680         # Common case of "Foreign band name - English song title"
 681         if restricted and result.startswith('-_'):
 682             result = result[2:]
 683         if result.startswith('-'):
 684             result = '_' + result[len('-'):]
 685         result = result.lstrip('.')
 686         if not result:
 687             result = '_'
 688     return result
 689
 690
 691 def sanitize_path(s, force=False):
 692     """Sanitizes and normalizes path on Windows"""
 693     if sys.platform == 'win32':
 694         force = False
 695         drive_or_unc, _ = os.path.splitdrive(s)
 696     elif force:
 697         drive_or_unc = ''
 698     else:
 699         return s
 700
 701     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 702     if drive_or_unc:
 703         norm_path.pop(0)
 704     sanitized_path = [
 705         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 706         for path_part in norm_path]
 707     if drive_or_unc:
 708         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 709     elif force and s and s[0] == os.path.sep:
 710         sanitized_path.insert(0, os.path.sep)
 711     return os.path.join(*sanitized_path)
 712
 713
 714 def sanitize_url(url):
 715     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 716     # the number of unwanted failures due to missing protocol
 717     if url is None:
 718         return
 719     elif url.startswith('//'):
 720         return 'http:%s' % url
 721     # Fix some common typos seen so far
 722     COMMON_TYPOS = (
 723         # https://github.com/ytdl-org/youtube-dl/issues/15649
 724         (r'^httpss://', r'https://'),
 725         # https://bx1.be/lives/direct-tv/
 726         (r'^rmtp([es]?)://', r'rtmp\1://'),
 727     )
 728     for mistake, fixup in COMMON_TYPOS:
 729         if re.match(mistake, url):
 730             return re.sub(mistake, fixup, url)
 731     return url
 732
 733
 734 def extract_basic_auth(url):
 735     parts = compat_urlparse.urlsplit(url)
 736     if parts.username is None:
 737         return url, None
 738     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
 739         parts.hostname if parts.port is None
 740         else '%s:%d' % (parts.hostname, parts.port))))
 741     auth_payload = base64.b64encode(
 742         ('%s:%s' % (parts.username, parts.password or '')).encode())
 743     return url, f'Basic {auth_payload.decode()}'
 744
 745
 746 def sanitized_Request(url, *args, **kwargs):
 747     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 748     if auth_header is not None:
 749         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 750         headers['Authorization'] = auth_header
 751     return compat_urllib_request.Request(url, *args, **kwargs)
 752
 753
 754 def expand_path(s):
 755     """Expand shell variables and ~"""
 756     return os.path.expandvars(compat_expanduser(s))
 757
 758
 759 def orderedSet(iterable):
 760     """ Remove all duplicates from the input iterable """
 761     res = []
 762     for el in iterable:
 763         if el not in res:
 764             res.append(el)
 765     return res
 766
 767
 768 def _htmlentity_transform(entity_with_semicolon):
 769     """Transforms an HTML entity to a character."""
 770     entity = entity_with_semicolon[:-1]
 771
 772     # Known non-numeric HTML entity
 773     if entity in compat_html_entities.name2codepoint:
 774         return compat_chr(compat_html_entities.name2codepoint[entity])
 775
 776     # TODO: HTML5 allows entities without a semicolon. For example,
 777     # '&Eacuteric' should be decoded as 'Éric'.
 778     if entity_with_semicolon in compat_html_entities_html5:
 779         return compat_html_entities_html5[entity_with_semicolon]
 780
 781     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 782     if mobj is not None:
 783         numstr = mobj.group(1)
 784         if numstr.startswith('x'):
 785             base = 16
 786             numstr = '0%s' % numstr
 787         else:
 788             base = 10
 789         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 790         with contextlib.suppress(ValueError):
 791             return compat_chr(int(numstr, base))
 792
 793     # Unknown entity in name, return its literal representation
 794     return '&%s;' % entity
 795
 796
 797 def unescapeHTML(s):
 798     if s is None:
 799         return None
 800     assert isinstance(s, str)
 801
 802     return re.sub(
 803         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 804
 805
 806 def escapeHTML(text):
 807     return (
 808         text
 809         .replace('&', '&amp;')
 810         .replace('<', '&lt;')
 811         .replace('>', '&gt;')
 812         .replace('"', '&quot;')
 813         .replace("'", '&#39;')
 814     )
 815
 816
 817 def process_communicate_or_kill(p, *args, **kwargs):
 818     try:
 819         return p.communicate(*args, **kwargs)
 820     except BaseException:  # Including KeyboardInterrupt
 821         p.kill()
 822         p.wait()
 823         raise
 824
 825
 826 class Popen(subprocess.Popen):
 827     if sys.platform == 'win32':
 828         _startupinfo = subprocess.STARTUPINFO()
 829         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 830     else:
 831         _startupinfo = None
 832
 833     def __init__(self, *args, **kwargs):
 834         super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
 835
 836     def communicate_or_kill(self, *args, **kwargs):
 837         return process_communicate_or_kill(self, *args, **kwargs)
 838
 839
 840 def get_subprocess_encoding():
 841     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 842         # For subprocess calls, encode with locale encoding
 843         # Refer to http://stackoverflow.com/a/9951851/35070
 844         encoding = preferredencoding()
 845     else:
 846         encoding = sys.getfilesystemencoding()
 847     if encoding is None:
 848         encoding = 'utf-8'
 849     return encoding
 850
 851
 852 def encodeFilename(s, for_subprocess=False):
 853     assert isinstance(s, str)
 854     return s
 855
 856
 857 def decodeFilename(b, for_subprocess=False):
 858     return b
 859
 860
 861 def encodeArgument(s):
 862     # Legacy code that uses byte strings
 863     # Uncomment the following line after fixing all post processors
 864     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 865     return s if isinstance(s, str) else s.decode('ascii')
 866
 867
 868 def decodeArgument(b):
 869     return b
 870
 871
 872 def decodeOption(optval):
 873     if optval is None:
 874         return optval
 875     if isinstance(optval, bytes):
 876         optval = optval.decode(preferredencoding())
 877
 878     assert isinstance(optval, compat_str)
 879     return optval
 880
 881
 882 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 883
 884
 885 def timetuple_from_msec(msec):
 886     secs, msec = divmod(msec, 1000)
 887     mins, secs = divmod(secs, 60)
 888     hrs, mins = divmod(mins, 60)
 889     return _timetuple(hrs, mins, secs, msec)
 890
 891
 892 def formatSeconds(secs, delim=':', msec=False):
 893     time = timetuple_from_msec(secs * 1000)
 894     if time.hours:
 895         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 896     elif time.minutes:
 897         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 898     else:
 899         ret = '%d' % time.seconds
 900     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 901
 902
 903 def _ssl_load_windows_store_certs(ssl_context, storename):
 904     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 905     try:
 906         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 907                  if encoding == 'x509_asn' and (
 908                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 909     except PermissionError:
 910         return
 911     for cert in certs:
 912         with contextlib.suppress(ssl.SSLError):
 913             ssl_context.load_verify_locations(cadata=cert)
 914
 915
 916 def make_HTTPS_handler(params, **kwargs):
 917     opts_check_certificate = not params.get('nocheckcertificate')
 918     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 919     context.check_hostname = opts_check_certificate
 920     if params.get('legacyserverconnect'):
 921         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 922         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 923         context.set_ciphers('DEFAULT')
 924     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 925     if opts_check_certificate:
 926         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 927             context.load_verify_locations(cafile=certifi.where())
 928         else:
 929             try:
 930                 context.load_default_certs()
 931                 # Work around the issue in load_default_certs when there are bad certificates. See:
 932                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
 933                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 934             except ssl.SSLError:
 935                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 936                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 937                     for storename in ('CA', 'ROOT'):
 938                         _ssl_load_windows_store_certs(context, storename)
 939                 context.set_default_verify_paths()
 940     client_certfile = params.get('client_certificate')
 941     if client_certfile:
 942         try:
 943             context.load_cert_chain(
 944                 client_certfile, keyfile=params.get('client_certificate_key'),
 945                 password=params.get('client_certificate_password'))
 946         except ssl.SSLError:
 947             raise YoutubeDLError('Unable to load client certificate')
 948     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 949
 950
 951 def bug_reports_message(before=';'):
 952     msg = ('please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , '
 953            'filling out the appropriate issue template. '
 954            'Confirm you are on the latest version using  yt-dlp -U')
 955
 956     before = before.rstrip()
 957     if not before or before.endswith(('.', '!', '?')):
 958         msg = msg[0].title() + msg[1:]
 959
 960     return (before + ' ' if before else '') + msg
 961
 962
 963 class YoutubeDLError(Exception):
 964     """Base exception for YoutubeDL errors."""
 965     msg = None
 966
 967     def __init__(self, msg=None):
 968         if msg is not None:
 969             self.msg = msg
 970         elif self.msg is None:
 971             self.msg = type(self).__name__
 972         super().__init__(self.msg)
 973
 974
 975 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
 976 if hasattr(ssl, 'CertificateError'):
 977     network_exceptions.append(ssl.CertificateError)
 978 network_exceptions = tuple(network_exceptions)
 979
 980
 981 class ExtractorError(YoutubeDLError):
 982     """Error during info extraction."""
 983
 984     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
 985         """ tb, if given, is the original traceback (so that it can be printed out).
 986         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
 987         """
 988         if sys.exc_info()[0] in network_exceptions:
 989             expected = True
 990
 991         self.orig_msg = str(msg)
 992         self.traceback = tb
 993         self.expected = expected
 994         self.cause = cause
 995         self.video_id = video_id
 996         self.ie = ie
 997         self.exc_info = sys.exc_info()  # preserve original exception
 998
 999         super().__init__(''.join((
1000             format_field(ie, template='[%s] '),
1001             format_field(video_id, template='%s: '),
1002             msg,
1003             format_field(cause, template=' (caused by %r)'),
1004             '' if expected else bug_reports_message())))
1005
1006     def format_traceback(self):
1007         return join_nonempty(
1008             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1009             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1010             delim='\n') or None
1011
1012
1013 class UnsupportedError(ExtractorError):
1014     def __init__(self, url):
1015         super().__init__(
1016             'Unsupported URL: %s' % url, expected=True)
1017         self.url = url
1018
1019
1020 class RegexNotFoundError(ExtractorError):
1021     """Error when a regex didn't match"""
1022     pass
1023
1024
1025 class GeoRestrictedError(ExtractorError):
1026     """Geographic restriction Error exception.
1027
1028     This exception may be thrown when a video is not available from your
1029     geographic location due to geographic restrictions imposed by a website.
1030     """
1031
1032     def __init__(self, msg, countries=None, **kwargs):
1033         kwargs['expected'] = True
1034         super().__init__(msg, **kwargs)
1035         self.countries = countries
1036
1037
1038 class DownloadError(YoutubeDLError):
1039     """Download Error exception.
1040
1041     This exception may be thrown by FileDownloader objects if they are not
1042     configured to continue on errors. They will contain the appropriate
1043     error message.
1044     """
1045
1046     def __init__(self, msg, exc_info=None):
1047         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1048         super().__init__(msg)
1049         self.exc_info = exc_info
1050
1051
1052 class EntryNotInPlaylist(YoutubeDLError):
1053     """Entry not in playlist exception.
1054
1055     This exception will be thrown by YoutubeDL when a requested entry
1056     is not found in the playlist info_dict
1057     """
1058     msg = 'Entry not found in info'
1059
1060
1061 class SameFileError(YoutubeDLError):
1062     """Same File exception.
1063
1064     This exception will be thrown by FileDownloader objects if they detect
1065     multiple files would have to be downloaded to the same file on disk.
1066     """
1067     msg = 'Fixed output name but more than one file to download'
1068
1069     def __init__(self, filename=None):
1070         if filename is not None:
1071             self.msg += f': {filename}'
1072         super().__init__(self.msg)
1073
1074
1075 class PostProcessingError(YoutubeDLError):
1076     """Post Processing exception.
1077
1078     This exception may be raised by PostProcessor's .run() method to
1079     indicate an error in the postprocessing task.
1080     """
1081
1082
1083 class DownloadCancelled(YoutubeDLError):
1084     """ Exception raised when the download queue should be interrupted """
1085     msg = 'The download was cancelled'
1086
1087
1088 class ExistingVideoReached(DownloadCancelled):
1089     """ --break-on-existing triggered """
1090     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1091
1092
1093 class RejectedVideoReached(DownloadCancelled):
1094     """ --break-on-reject triggered """
1095     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1096
1097
1098 class MaxDownloadsReached(DownloadCancelled):
1099     """ --max-downloads limit has been reached. """
1100     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1101
1102
1103 class ReExtractInfo(YoutubeDLError):
1104     """ Video info needs to be re-extracted. """
1105
1106     def __init__(self, msg, expected=False):
1107         super().__init__(msg)
1108         self.expected = expected
1109
1110
1111 class ThrottledDownload(ReExtractInfo):
1112     """ Download speed below --throttled-rate. """
1113     msg = 'The download speed is below throttle limit'
1114
1115     def __init__(self):
1116         super().__init__(self.msg, expected=False)
1117
1118
1119 class UnavailableVideoError(YoutubeDLError):
1120     """Unavailable Format exception.
1121
1122     This exception will be thrown when a video is requested
1123     in a format that is not available for that video.
1124     """
1125     msg = 'Unable to download video'
1126
1127     def __init__(self, err=None):
1128         if err is not None:
1129             self.msg += f': {err}'
1130         super().__init__(self.msg)
1131
1132
1133 class ContentTooShortError(YoutubeDLError):
1134     """Content Too Short exception.
1135
1136     This exception may be raised by FileDownloader objects when a file they
1137     download is too small for what the server announced first, indicating
1138     the connection was probably interrupted.
1139     """
1140
1141     def __init__(self, downloaded, expected):
1142         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1143         # Both in bytes
1144         self.downloaded = downloaded
1145         self.expected = expected
1146
1147
1148 class XAttrMetadataError(YoutubeDLError):
1149     def __init__(self, code=None, msg='Unknown error'):
1150         super().__init__(msg)
1151         self.code = code
1152         self.msg = msg
1153
1154         # Parsing code and msg
1155         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1156                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1157             self.reason = 'NO_SPACE'
1158         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1159             self.reason = 'VALUE_TOO_LONG'
1160         else:
1161             self.reason = 'NOT_SUPPORTED'
1162
1163
1164 class XAttrUnavailableError(YoutubeDLError):
1165     pass
1166
1167
1168 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1169     hc = http_class(*args, **kwargs)
1170     source_address = ydl_handler._params.get('source_address')
1171
1172     if source_address is not None:
1173         # This is to workaround _create_connection() from socket where it will try all
1174         # address data from getaddrinfo() including IPv6. This filters the result from
1175         # getaddrinfo() based on the source_address value.
1176         # This is based on the cpython socket.create_connection() function.
1177         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1178         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1179             host, port = address
1180             err = None
1181             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1182             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1183             ip_addrs = [addr for addr in addrs if addr[0] == af]
1184             if addrs and not ip_addrs:
1185                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1186                 raise OSError(
1187                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1188                     % (ip_version, source_address[0]))
1189             for res in ip_addrs:
1190                 af, socktype, proto, canonname, sa = res
1191                 sock = None
1192                 try:
1193                     sock = socket.socket(af, socktype, proto)
1194                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1195                         sock.settimeout(timeout)
1196                     sock.bind(source_address)
1197                     sock.connect(sa)
1198                     err = None  # Explicitly break reference cycle
1199                     return sock
1200                 except OSError as _:
1201                     err = _
1202                     if sock is not None:
1203                         sock.close()
1204             if err is not None:
1205                 raise err
1206             else:
1207                 raise OSError('getaddrinfo returns an empty list')
1208         if hasattr(hc, '_create_connection'):
1209             hc._create_connection = _create_connection
1210         hc.source_address = (source_address, 0)
1211
1212     return hc
1213
1214
1215 def handle_youtubedl_headers(headers):
1216     filtered_headers = headers
1217
1218     if 'Youtubedl-no-compression' in filtered_headers:
1219         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1220         del filtered_headers['Youtubedl-no-compression']
1221
1222     return filtered_headers
1223
1224
1225 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
1226     """Handler for HTTP requests and responses.
1227
1228     This class, when installed with an OpenerDirector, automatically adds
1229     the standard headers to every HTTP request and handles gzipped and
1230     deflated responses from web servers. If compression is to be avoided in
1231     a particular request, the original request in the program code only has
1232     to include the HTTP header "Youtubedl-no-compression", which will be
1233     removed before making the real request.
1234
1235     Part of this code was copied from:
1236
1237     http://techknack.net/python-urllib2-handlers/
1238
1239     Andrew Rowls, the author of that code, agreed to release it to the
1240     public domain.
1241     """
1242
1243     def __init__(self, params, *args, **kwargs):
1244         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
1245         self._params = params
1246
1247     def http_open(self, req):
1248         conn_class = compat_http_client.HTTPConnection
1249
1250         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1251         if socks_proxy:
1252             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1253             del req.headers['Ytdl-socks-proxy']
1254
1255         return self.do_open(functools.partial(
1256             _create_http_connection, self, conn_class, False),
1257             req)
1258
1259     @staticmethod
1260     def deflate(data):
1261         if not data:
1262             return data
1263         try:
1264             return zlib.decompress(data, -zlib.MAX_WBITS)
1265         except zlib.error:
1266             return zlib.decompress(data)
1267
1268     @staticmethod
1269     def brotli(data):
1270         if not data:
1271             return data
1272         return brotli.decompress(data)
1273
1274     def http_request(self, req):
1275         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1276         # always respected by websites, some tend to give out URLs with non percent-encoded
1277         # non-ASCII characters (see telemb.py, ard.py [#3412])
1278         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1279         # To work around aforementioned issue we will replace request's original URL with
1280         # percent-encoded one
1281         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1282         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1283         url = req.get_full_url()
1284         url_escaped = escape_url(url)
1285
1286         # Substitute URL if any change after escaping
1287         if url != url_escaped:
1288             req = update_Request(req, url=url_escaped)
1289
1290         for h, v in self._params.get('http_headers', std_headers).items():
1291             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1292             # The dict keys are capitalized because of this bug by urllib
1293             if h.capitalize() not in req.headers:
1294                 req.add_header(h, v)
1295
1296         if 'Accept-encoding' not in req.headers:
1297             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1298
1299         req.headers = handle_youtubedl_headers(req.headers)
1300
1301         return req
1302
1303     def http_response(self, req, resp):
1304         old_resp = resp
1305         # gzip
1306         if resp.headers.get('Content-encoding', '') == 'gzip':
1307             content = resp.read()
1308             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1309             try:
1310                 uncompressed = io.BytesIO(gz.read())
1311             except OSError as original_ioerror:
1312                 # There may be junk add the end of the file
1313                 # See http://stackoverflow.com/q/4928560/35070 for details
1314                 for i in range(1, 1024):
1315                     try:
1316                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1317                         uncompressed = io.BytesIO(gz.read())
1318                     except OSError:
1319                         continue
1320                     break
1321                 else:
1322                     raise original_ioerror
1323             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1324             resp.msg = old_resp.msg
1325             del resp.headers['Content-encoding']
1326         # deflate
1327         if resp.headers.get('Content-encoding', '') == 'deflate':
1328             gz = io.BytesIO(self.deflate(resp.read()))
1329             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1330             resp.msg = old_resp.msg
1331             del resp.headers['Content-encoding']
1332         # brotli
1333         if resp.headers.get('Content-encoding', '') == 'br':
1334             resp = compat_urllib_request.addinfourl(
1335                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1336             resp.msg = old_resp.msg
1337             del resp.headers['Content-encoding']
1338         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1339         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1340         if 300 <= resp.code < 400:
1341             location = resp.headers.get('Location')
1342             if location:
1343                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1344                 location = location.encode('iso-8859-1').decode()
1345                 location_escaped = escape_url(location)
1346                 if location != location_escaped:
1347                     del resp.headers['Location']
1348                     resp.headers['Location'] = location_escaped
1349         return resp
1350
1351     https_request = http_request
1352     https_response = http_response
1353
1354
1355 def make_socks_conn_class(base_class, socks_proxy):
1356     assert issubclass(base_class, (
1357         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
1358
1359     url_components = compat_urlparse.urlparse(socks_proxy)
1360     if url_components.scheme.lower() == 'socks5':
1361         socks_type = ProxyType.SOCKS5
1362     elif url_components.scheme.lower() in ('socks', 'socks4'):
1363         socks_type = ProxyType.SOCKS4
1364     elif url_components.scheme.lower() == 'socks4a':
1365         socks_type = ProxyType.SOCKS4A
1366
1367     def unquote_if_non_empty(s):
1368         if not s:
1369             return s
1370         return compat_urllib_parse_unquote_plus(s)
1371
1372     proxy_args = (
1373         socks_type,
1374         url_components.hostname, url_components.port or 1080,
1375         True,  # Remote DNS
1376         unquote_if_non_empty(url_components.username),
1377         unquote_if_non_empty(url_components.password),
1378     )
1379
1380     class SocksConnection(base_class):
1381         def connect(self):
1382             self.sock = sockssocket()
1383             self.sock.setproxy(*proxy_args)
1384             if isinstance(self.timeout, (int, float)):
1385                 self.sock.settimeout(self.timeout)
1386             self.sock.connect((self.host, self.port))
1387
1388             if isinstance(self, compat_http_client.HTTPSConnection):
1389                 if hasattr(self, '_context'):  # Python > 2.6
1390                     self.sock = self._context.wrap_socket(
1391                         self.sock, server_hostname=self.host)
1392                 else:
1393                     self.sock = ssl.wrap_socket(self.sock)
1394
1395     return SocksConnection
1396
1397
1398 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
1399     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1400         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
1401         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
1402         self._params = params
1403
1404     def https_open(self, req):
1405         kwargs = {}
1406         conn_class = self._https_conn_class
1407
1408         if hasattr(self, '_context'):  # python > 2.6
1409             kwargs['context'] = self._context
1410         if hasattr(self, '_check_hostname'):  # python 3.x
1411             kwargs['check_hostname'] = self._check_hostname
1412
1413         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1414         if socks_proxy:
1415             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1416             del req.headers['Ytdl-socks-proxy']
1417
1418         try:
1419             return self.do_open(
1420                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1421         except urllib.error.URLError as e:
1422             if (isinstance(e.reason, ssl.SSLError)
1423                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1424                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1425             raise
1426
1427
1428 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1429     """
1430     See [1] for cookie file format.
1431
1432     1. https://curl.haxx.se/docs/http-cookies.html
1433     """
1434     _HTTPONLY_PREFIX = '#HttpOnly_'
1435     _ENTRY_LEN = 7
1436     _HEADER = '''# Netscape HTTP Cookie File
1437 # This file is generated by yt-dlp.  Do not edit.
1438
1439 '''
1440     _CookieFileEntry = collections.namedtuple(
1441         'CookieFileEntry',
1442         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1443
1444     def __init__(self, filename=None, *args, **kwargs):
1445         super().__init__(None, *args, **kwargs)
1446         if self.is_path(filename):
1447             filename = os.fspath(filename)
1448         self.filename = filename
1449
1450     @staticmethod
1451     def _true_or_false(cndn):
1452         return 'TRUE' if cndn else 'FALSE'
1453
1454     @staticmethod
1455     def is_path(file):
1456         return isinstance(file, (str, bytes, os.PathLike))
1457
1458     @contextlib.contextmanager
1459     def open(self, file, *, write=False):
1460         if self.is_path(file):
1461             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1462                 yield f
1463         else:
1464             if write:
1465                 file.truncate(0)
1466             yield file
1467
1468     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1469         now = time.time()
1470         for cookie in self:
1471             if (not ignore_discard and cookie.discard
1472                     or not ignore_expires and cookie.is_expired(now)):
1473                 continue
1474             name, value = cookie.name, cookie.value
1475             if value is None:
1476                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1477                 # with no name, whereas http.cookiejar regards it as a
1478                 # cookie with no value.
1479                 name, value = '', name
1480             f.write('%s\n' % '\t'.join((
1481                 cookie.domain,
1482                 self._true_or_false(cookie.domain.startswith('.')),
1483                 cookie.path,
1484                 self._true_or_false(cookie.secure),
1485                 str_or_none(cookie.expires, default=''),
1486                 name, value
1487             )))
1488
1489     def save(self, filename=None, *args, **kwargs):
1490         """
1491         Save cookies to a file.
1492         Code is taken from CPython 3.6
1493         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1494
1495         if filename is None:
1496             if self.filename is not None:
1497                 filename = self.filename
1498             else:
1499                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1500
1501         # Store session cookies with `expires` set to 0 instead of an empty string
1502         for cookie in self:
1503             if cookie.expires is None:
1504                 cookie.expires = 0
1505
1506         with self.open(filename, write=True) as f:
1507             f.write(self._HEADER)
1508             self._really_save(f, *args, **kwargs)
1509
1510     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1511         """Load cookies from a file."""
1512         if filename is None:
1513             if self.filename is not None:
1514                 filename = self.filename
1515             else:
1516                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1517
1518         def prepare_line(line):
1519             if line.startswith(self._HTTPONLY_PREFIX):
1520                 line = line[len(self._HTTPONLY_PREFIX):]
1521             # comments and empty lines are fine
1522             if line.startswith('#') or not line.strip():
1523                 return line
1524             cookie_list = line.split('\t')
1525             if len(cookie_list) != self._ENTRY_LEN:
1526                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
1527             cookie = self._CookieFileEntry(*cookie_list)
1528             if cookie.expires_at and not cookie.expires_at.isdigit():
1529                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1530             return line
1531
1532         cf = io.StringIO()
1533         with self.open(filename) as f:
1534             for line in f:
1535                 try:
1536                     cf.write(prepare_line(line))
1537                 except compat_cookiejar.LoadError as e:
1538                     if f'{line.strip()} '[0] in '[{"':
1539                         raise compat_cookiejar.LoadError(
1540                             'Cookies file must be Netscape formatted, not JSON. See  '
1541                             'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl')
1542                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1543                     continue
1544         cf.seek(0)
1545         self._really_load(cf, filename, ignore_discard, ignore_expires)
1546         # Session cookies are denoted by either `expires` field set to
1547         # an empty string or 0. MozillaCookieJar only recognizes the former
1548         # (see [1]). So we need force the latter to be recognized as session
1549         # cookies on our own.
1550         # Session cookies may be important for cookies-based authentication,
1551         # e.g. usually, when user does not check 'Remember me' check box while
1552         # logging in on a site, some important cookies are stored as session
1553         # cookies so that not recognizing them will result in failed login.
1554         # 1. https://bugs.python.org/issue17164
1555         for cookie in self:
1556             # Treat `expires=0` cookies as session cookies
1557             if cookie.expires == 0:
1558                 cookie.expires = None
1559                 cookie.discard = True
1560
1561
1562 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
1563     def __init__(self, cookiejar=None):
1564         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
1565
1566     def http_response(self, request, response):
1567         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
1568
1569     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
1570     https_response = http_response
1571
1572
1573 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
1574     """YoutubeDL redirect handler
1575
1576     The code is based on HTTPRedirectHandler implementation from CPython [1].
1577
1578     This redirect handler solves two issues:
1579      - ensures redirect URL is always unicode under python 2
1580      - introduces support for experimental HTTP response status code
1581        308 Permanent Redirect [2] used by some sites [3]
1582
1583     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1584     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1585     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1586     """
1587
1588     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
1589
1590     def redirect_request(self, req, fp, code, msg, headers, newurl):
1591         """Return a Request or None in response to a redirect.
1592
1593         This is called by the http_error_30x methods when a
1594         redirection response is received.  If a redirection should
1595         take place, return a new Request to allow http_error_30x to
1596         perform the redirect.  Otherwise, raise HTTPError if no-one
1597         else should try to handle this url.  Return None if you can't
1598         but another Handler might.
1599         """
1600         m = req.get_method()
1601         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1602                  or code in (301, 302, 303) and m == "POST")):
1603             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
1604         # Strictly (according to RFC 2616), 301 or 302 in response to
1605         # a POST MUST NOT cause a redirection without confirmation
1606         # from the user (of urllib.request, in this case).  In practice,
1607         # essentially all clients do redirect in this case, so we do
1608         # the same.
1609
1610         # Be conciliant with URIs containing a space.  This is mainly
1611         # redundant with the more complete encoding done in http_error_302(),
1612         # but it is kept for compatibility with other callers.
1613         newurl = newurl.replace(' ', '%20')
1614
1615         CONTENT_HEADERS = ("content-length", "content-type")
1616         # NB: don't use dict comprehension for python 2.6 compatibility
1617         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1618
1619         # A 303 must either use GET or HEAD for subsequent request
1620         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1621         if code == 303 and m != 'HEAD':
1622             m = 'GET'
1623         # 301 and 302 redirects are commonly turned into a GET from a POST
1624         # for subsequent requests by browsers, so we'll do the same.
1625         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1626         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1627         if code in (301, 302) and m == 'POST':
1628             m = 'GET'
1629
1630         return compat_urllib_request.Request(
1631             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1632             unverifiable=True, method=m)
1633
1634
1635 def extract_timezone(date_str):
1636     m = re.search(
1637         r'''(?x)
1638             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1639             (?P<tz>Z|                                            # just the UTC Z, or
1640                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1641                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1642                    [ ]?                                          # optional space
1643                 (?P<sign>\+|-)                                   # +/-
1644                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1645             $)
1646         ''', date_str)
1647     if not m:
1648         timezone = datetime.timedelta()
1649     else:
1650         date_str = date_str[:-len(m.group('tz'))]
1651         if not m.group('sign'):
1652             timezone = datetime.timedelta()
1653         else:
1654             sign = 1 if m.group('sign') == '+' else -1
1655             timezone = datetime.timedelta(
1656                 hours=sign * int(m.group('hours')),
1657                 minutes=sign * int(m.group('minutes')))
1658     return timezone, date_str
1659
1660
1661 def parse_iso8601(date_str, delimiter='T', timezone=None):
1662     """ Return a UNIX timestamp from the given date """
1663
1664     if date_str is None:
1665         return None
1666
1667     date_str = re.sub(r'\.[0-9]+', '', date_str)
1668
1669     if timezone is None:
1670         timezone, date_str = extract_timezone(date_str)
1671
1672     with contextlib.suppress(ValueError):
1673         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1674         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1675         return calendar.timegm(dt.timetuple())
1676
1677
1678 def date_formats(day_first=True):
1679     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1680
1681
1682 def unified_strdate(date_str, day_first=True):
1683     """Return a string with the date in the format YYYYMMDD"""
1684
1685     if date_str is None:
1686         return None
1687     upload_date = None
1688     # Replace commas
1689     date_str = date_str.replace(',', ' ')
1690     # Remove AM/PM + timezone
1691     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1692     _, date_str = extract_timezone(date_str)
1693
1694     for expression in date_formats(day_first):
1695         with contextlib.suppress(ValueError):
1696             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1697     if upload_date is None:
1698         timetuple = email.utils.parsedate_tz(date_str)
1699         if timetuple:
1700             with contextlib.suppress(ValueError):
1701                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1702     if upload_date is not None:
1703         return compat_str(upload_date)
1704
1705
1706 def unified_timestamp(date_str, day_first=True):
1707     if date_str is None:
1708         return None
1709
1710     date_str = re.sub(r'[,|]', '', date_str)
1711
1712     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1713     timezone, date_str = extract_timezone(date_str)
1714
1715     # Remove AM/PM + timezone
1716     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1717
1718     # Remove unrecognized timezones from ISO 8601 alike timestamps
1719     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1720     if m:
1721         date_str = date_str[:-len(m.group('tz'))]
1722
1723     # Python only supports microseconds, so remove nanoseconds
1724     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1725     if m:
1726         date_str = m.group(1)
1727
1728     for expression in date_formats(day_first):
1729         with contextlib.suppress(ValueError):
1730             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1731             return calendar.timegm(dt.timetuple())
1732     timetuple = email.utils.parsedate_tz(date_str)
1733     if timetuple:
1734         return calendar.timegm(timetuple) + pm_delta * 3600
1735
1736
1737 def determine_ext(url, default_ext='unknown_video'):
1738     if url is None or '.' not in url:
1739         return default_ext
1740     guess = url.partition('?')[0].rpartition('.')[2]
1741     if re.match(r'^[A-Za-z0-9]+$', guess):
1742         return guess
1743     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1744     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1745         return guess.rstrip('/')
1746     else:
1747         return default_ext
1748
1749
1750 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1751     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1752
1753
1754 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1755     R"""
1756     Return a datetime object from a string.
1757     Supported format:
1758         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1759
1760     @param format       strftime format of DATE
1761     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1762                         auto: round to the unit provided in date_str (if applicable).
1763     """
1764     auto_precision = False
1765     if precision == 'auto':
1766         auto_precision = True
1767         precision = 'microsecond'
1768     today = datetime_round(datetime.datetime.utcnow(), precision)
1769     if date_str in ('now', 'today'):
1770         return today
1771     if date_str == 'yesterday':
1772         return today - datetime.timedelta(days=1)
1773     match = re.match(
1774         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1775         date_str)
1776     if match is not None:
1777         start_time = datetime_from_str(match.group('start'), precision, format)
1778         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1779         unit = match.group('unit')
1780         if unit == 'month' or unit == 'year':
1781             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1782             unit = 'day'
1783         else:
1784             if unit == 'week':
1785                 unit = 'day'
1786                 time *= 7
1787             delta = datetime.timedelta(**{unit + 's': time})
1788             new_date = start_time + delta
1789         if auto_precision:
1790             return datetime_round(new_date, unit)
1791         return new_date
1792
1793     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1794
1795
1796 def date_from_str(date_str, format='%Y%m%d', strict=False):
1797     R"""
1798     Return a date object from a string using datetime_from_str
1799
1800     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1801                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1802     """
1803     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1804         raise ValueError(f'Invalid date format "{date_str}"')
1805     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1806
1807
1808 def datetime_add_months(dt, months):
1809     """Increment/Decrement a datetime object by months."""
1810     month = dt.month + months - 1
1811     year = dt.year + month // 12
1812     month = month % 12 + 1
1813     day = min(dt.day, calendar.monthrange(year, month)[1])
1814     return dt.replace(year, month, day)
1815
1816
1817 def datetime_round(dt, precision='day'):
1818     """
1819     Round a datetime object's time to a specific precision
1820     """
1821     if precision == 'microsecond':
1822         return dt
1823
1824     unit_seconds = {
1825         'day': 86400,
1826         'hour': 3600,
1827         'minute': 60,
1828         'second': 1,
1829     }
1830     roundto = lambda x, n: ((x + n / 2) // n) * n
1831     timestamp = calendar.timegm(dt.timetuple())
1832     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1833
1834
1835 def hyphenate_date(date_str):
1836     """
1837     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1838     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1839     if match is not None:
1840         return '-'.join(match.groups())
1841     else:
1842         return date_str
1843
1844
1845 class DateRange:
1846     """Represents a time interval between two dates"""
1847
1848     def __init__(self, start=None, end=None):
1849         """start and end must be strings in the format accepted by date"""
1850         if start is not None:
1851             self.start = date_from_str(start, strict=True)
1852         else:
1853             self.start = datetime.datetime.min.date()
1854         if end is not None:
1855             self.end = date_from_str(end, strict=True)
1856         else:
1857             self.end = datetime.datetime.max.date()
1858         if self.start > self.end:
1859             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1860
1861     @classmethod
1862     def day(cls, day):
1863         """Returns a range that only contains the given day"""
1864         return cls(day, day)
1865
1866     def __contains__(self, date):
1867         """Check if the date is in the range"""
1868         if not isinstance(date, datetime.date):
1869             date = date_from_str(date)
1870         return self.start <= date <= self.end
1871
1872     def __str__(self):
1873         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1874
1875
1876 def platform_name():
1877     """ Returns the platform name as a compat_str """
1878     res = platform.platform()
1879     if isinstance(res, bytes):
1880         res = res.decode(preferredencoding())
1881
1882     assert isinstance(res, compat_str)
1883     return res
1884
1885
1886 @functools.cache
1887 def get_windows_version():
1888     ''' Get Windows version. None if it's not running on Windows '''
1889     if compat_os_name == 'nt':
1890         return version_tuple(platform.win32_ver()[1])
1891     else:
1892         return None
1893
1894
1895 def write_string(s, out=None, encoding=None):
1896     assert isinstance(s, str)
1897     out = out or sys.stderr
1898
1899     if compat_os_name == 'nt' and supports_terminal_sequences(out):
1900         s = re.sub(r'([\r\n]+)', r' \1', s)
1901
1902     if 'b' in getattr(out, 'mode', ''):
1903         byt = s.encode(encoding or preferredencoding(), 'ignore')
1904         out.write(byt)
1905     elif hasattr(out, 'buffer'):
1906         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1907         byt = s.encode(enc, 'ignore')
1908         out.buffer.write(byt)
1909     else:
1910         out.write(s)
1911     out.flush()
1912
1913
1914 def bytes_to_intlist(bs):
1915     if not bs:
1916         return []
1917     if isinstance(bs[0], int):  # Python 3
1918         return list(bs)
1919     else:
1920         return [ord(c) for c in bs]
1921
1922
1923 def intlist_to_bytes(xs):
1924     if not xs:
1925         return b''
1926     return compat_struct_pack('%dB' % len(xs), *xs)
1927
1928
1929 class LockingUnsupportedError(IOError):
1930     msg = 'File locking is not supported on this platform'
1931
1932     def __init__(self):
1933         super().__init__(self.msg)
1934
1935
1936 # Cross-platform file locking
1937 if sys.platform == 'win32':
1938     import ctypes.wintypes
1939     import msvcrt
1940
1941     class OVERLAPPED(ctypes.Structure):
1942         _fields_ = [
1943             ('Internal', ctypes.wintypes.LPVOID),
1944             ('InternalHigh', ctypes.wintypes.LPVOID),
1945             ('Offset', ctypes.wintypes.DWORD),
1946             ('OffsetHigh', ctypes.wintypes.DWORD),
1947             ('hEvent', ctypes.wintypes.HANDLE),
1948         ]
1949
1950     kernel32 = ctypes.windll.kernel32
1951     LockFileEx = kernel32.LockFileEx
1952     LockFileEx.argtypes = [
1953         ctypes.wintypes.HANDLE,     # hFile
1954         ctypes.wintypes.DWORD,      # dwFlags
1955         ctypes.wintypes.DWORD,      # dwReserved
1956         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1957         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1958         ctypes.POINTER(OVERLAPPED)  # Overlapped
1959     ]
1960     LockFileEx.restype = ctypes.wintypes.BOOL
1961     UnlockFileEx = kernel32.UnlockFileEx
1962     UnlockFileEx.argtypes = [
1963         ctypes.wintypes.HANDLE,     # hFile
1964         ctypes.wintypes.DWORD,      # dwReserved
1965         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1966         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1967         ctypes.POINTER(OVERLAPPED)  # Overlapped
1968     ]
1969     UnlockFileEx.restype = ctypes.wintypes.BOOL
1970     whole_low = 0xffffffff
1971     whole_high = 0x7fffffff
1972
1973     def _lock_file(f, exclusive, block):
1974         overlapped = OVERLAPPED()
1975         overlapped.Offset = 0
1976         overlapped.OffsetHigh = 0
1977         overlapped.hEvent = 0
1978         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1979
1980         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
1981                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
1982                           0, whole_low, whole_high, f._lock_file_overlapped_p):
1983             raise BlockingIOError('Locking file failed: %r' % ctypes.FormatError())
1984
1985     def _unlock_file(f):
1986         assert f._lock_file_overlapped_p
1987         handle = msvcrt.get_osfhandle(f.fileno())
1988         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
1989             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1990
1991 else:
1992     try:
1993         import fcntl
1994
1995         def _lock_file(f, exclusive, block):
1996             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
1997             if not block:
1998                 flags |= fcntl.LOCK_NB
1999             try:
2000                 fcntl.flock(f, flags)
2001             except BlockingIOError:
2002                 raise
2003             except OSError:  # AOSP does not have flock()
2004                 fcntl.lockf(f, flags)
2005
2006         def _unlock_file(f):
2007             try:
2008                 fcntl.flock(f, fcntl.LOCK_UN)
2009             except OSError:
2010                 fcntl.lockf(f, fcntl.LOCK_UN)
2011
2012     except ImportError:
2013
2014         def _lock_file(f, exclusive, block):
2015             raise LockingUnsupportedError()
2016
2017         def _unlock_file(f):
2018             raise LockingUnsupportedError()
2019
2020
2021 class locked_file:
2022     locked = False
2023
2024     def __init__(self, filename, mode, block=True, encoding=None):
2025         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2026             raise NotImplementedError(mode)
2027         self.mode, self.block = mode, block
2028
2029         writable = any(f in mode for f in 'wax+')
2030         readable = any(f in mode for f in 'r+')
2031         flags = functools.reduce(operator.ior, (
2032             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2033             getattr(os, 'O_BINARY', 0),  # Windows only
2034             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2035             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2036             os.O_APPEND if 'a' in mode else 0,
2037             os.O_EXCL if 'x' in mode else 0,
2038             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2039         ))
2040
2041         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2042
2043     def __enter__(self):
2044         exclusive = 'r' not in self.mode
2045         try:
2046             _lock_file(self.f, exclusive, self.block)
2047             self.locked = True
2048         except OSError:
2049             self.f.close()
2050             raise
2051         if 'w' in self.mode:
2052             try:
2053                 self.f.truncate()
2054             except OSError as e:
2055                 if e.errno != 29:  # Illegal seek, expected when self.f is a FIFO
2056                     raise e
2057         return self
2058
2059     def unlock(self):
2060         if not self.locked:
2061             return
2062         try:
2063             _unlock_file(self.f)
2064         finally:
2065             self.locked = False
2066
2067     def __exit__(self, *_):
2068         try:
2069             self.unlock()
2070         finally:
2071             self.f.close()
2072
2073     open = __enter__
2074     close = __exit__
2075
2076     def __getattr__(self, attr):
2077         return getattr(self.f, attr)
2078
2079     def __iter__(self):
2080         return iter(self.f)
2081
2082
2083 @functools.cache
2084 def get_filesystem_encoding():
2085     encoding = sys.getfilesystemencoding()
2086     return encoding if encoding is not None else 'utf-8'
2087
2088
2089 def shell_quote(args):
2090     quoted_args = []
2091     encoding = get_filesystem_encoding()
2092     for a in args:
2093         if isinstance(a, bytes):
2094             # We may get a filename encoded with 'encodeFilename'
2095             a = a.decode(encoding)
2096         quoted_args.append(compat_shlex_quote(a))
2097     return ' '.join(quoted_args)
2098
2099
2100 def smuggle_url(url, data):
2101     """ Pass additional data in a URL for internal use. """
2102
2103     url, idata = unsmuggle_url(url, {})
2104     data.update(idata)
2105     sdata = compat_urllib_parse_urlencode(
2106         {'__youtubedl_smuggle': json.dumps(data)})
2107     return url + '#' + sdata
2108
2109
2110 def unsmuggle_url(smug_url, default=None):
2111     if '#__youtubedl_smuggle' not in smug_url:
2112         return smug_url, default
2113     url, _, sdata = smug_url.rpartition('#')
2114     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
2115     data = json.loads(jsond)
2116     return url, data
2117
2118
2119 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2120     """ Formats numbers with decimal sufixes like K, M, etc """
2121     num, factor = float_or_none(num), float(factor)
2122     if num is None or num < 0:
2123         return None
2124     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2125     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2126     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2127     if factor == 1024:
2128         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2129     converted = num / (factor ** exponent)
2130     return fmt % (converted, suffix)
2131
2132
2133 def format_bytes(bytes):
2134     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2135
2136
2137 def lookup_unit_table(unit_table, s):
2138     units_re = '|'.join(re.escape(u) for u in unit_table)
2139     m = re.match(
2140         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2141     if not m:
2142         return None
2143     num_str = m.group('num').replace(',', '.')
2144     mult = unit_table[m.group('unit')]
2145     return int(float(num_str) * mult)
2146
2147
2148 def parse_filesize(s):
2149     if s is None:
2150         return None
2151
2152     # The lower-case forms are of course incorrect and unofficial,
2153     # but we support those too
2154     _UNIT_TABLE = {
2155         'B': 1,
2156         'b': 1,
2157         'bytes': 1,
2158         'KiB': 1024,
2159         'KB': 1000,
2160         'kB': 1024,
2161         'Kb': 1000,
2162         'kb': 1000,
2163         'kilobytes': 1000,
2164         'kibibytes': 1024,
2165         'MiB': 1024 ** 2,
2166         'MB': 1000 ** 2,
2167         'mB': 1024 ** 2,
2168         'Mb': 1000 ** 2,
2169         'mb': 1000 ** 2,
2170         'megabytes': 1000 ** 2,
2171         'mebibytes': 1024 ** 2,
2172         'GiB': 1024 ** 3,
2173         'GB': 1000 ** 3,
2174         'gB': 1024 ** 3,
2175         'Gb': 1000 ** 3,
2176         'gb': 1000 ** 3,
2177         'gigabytes': 1000 ** 3,
2178         'gibibytes': 1024 ** 3,
2179         'TiB': 1024 ** 4,
2180         'TB': 1000 ** 4,
2181         'tB': 1024 ** 4,
2182         'Tb': 1000 ** 4,
2183         'tb': 1000 ** 4,
2184         'terabytes': 1000 ** 4,
2185         'tebibytes': 1024 ** 4,
2186         'PiB': 1024 ** 5,
2187         'PB': 1000 ** 5,
2188         'pB': 1024 ** 5,
2189         'Pb': 1000 ** 5,
2190         'pb': 1000 ** 5,
2191         'petabytes': 1000 ** 5,
2192         'pebibytes': 1024 ** 5,
2193         'EiB': 1024 ** 6,
2194         'EB': 1000 ** 6,
2195         'eB': 1024 ** 6,
2196         'Eb': 1000 ** 6,
2197         'eb': 1000 ** 6,
2198         'exabytes': 1000 ** 6,
2199         'exbibytes': 1024 ** 6,
2200         'ZiB': 1024 ** 7,
2201         'ZB': 1000 ** 7,
2202         'zB': 1024 ** 7,
2203         'Zb': 1000 ** 7,
2204         'zb': 1000 ** 7,
2205         'zettabytes': 1000 ** 7,
2206         'zebibytes': 1024 ** 7,
2207         'YiB': 1024 ** 8,
2208         'YB': 1000 ** 8,
2209         'yB': 1024 ** 8,
2210         'Yb': 1000 ** 8,
2211         'yb': 1000 ** 8,
2212         'yottabytes': 1000 ** 8,
2213         'yobibytes': 1024 ** 8,
2214     }
2215
2216     return lookup_unit_table(_UNIT_TABLE, s)
2217
2218
2219 def parse_count(s):
2220     if s is None:
2221         return None
2222
2223     s = re.sub(r'^[^\d]+\s', '', s).strip()
2224
2225     if re.match(r'^[\d,.]+$', s):
2226         return str_to_int(s)
2227
2228     _UNIT_TABLE = {
2229         'k': 1000,
2230         'K': 1000,
2231         'm': 1000 ** 2,
2232         'M': 1000 ** 2,
2233         'kk': 1000 ** 2,
2234         'KK': 1000 ** 2,
2235         'b': 1000 ** 3,
2236         'B': 1000 ** 3,
2237     }
2238
2239     ret = lookup_unit_table(_UNIT_TABLE, s)
2240     if ret is not None:
2241         return ret
2242
2243     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2244     if mobj:
2245         return str_to_int(mobj.group(1))
2246
2247
2248 def parse_resolution(s, *, lenient=False):
2249     if s is None:
2250         return {}
2251
2252     if lenient:
2253         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2254     else:
2255         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2256     if mobj:
2257         return {
2258             'width': int(mobj.group('w')),
2259             'height': int(mobj.group('h')),
2260         }
2261
2262     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2263     if mobj:
2264         return {'height': int(mobj.group(1))}
2265
2266     mobj = re.search(r'\b([48])[kK]\b', s)
2267     if mobj:
2268         return {'height': int(mobj.group(1)) * 540}
2269
2270     return {}
2271
2272
2273 def parse_bitrate(s):
2274     if not isinstance(s, compat_str):
2275         return
2276     mobj = re.search(r'\b(\d+)\s*kbps', s)
2277     if mobj:
2278         return int(mobj.group(1))
2279
2280
2281 def month_by_name(name, lang='en'):
2282     """ Return the number of a month by (locale-independently) English name """
2283
2284     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2285
2286     try:
2287         return month_names.index(name) + 1
2288     except ValueError:
2289         return None
2290
2291
2292 def month_by_abbreviation(abbrev):
2293     """ Return the number of a month by (locale-independently) English
2294         abbreviations """
2295
2296     try:
2297         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2298     except ValueError:
2299         return None
2300
2301
2302 def fix_xml_ampersands(xml_str):
2303     """Replace all the '&' by '&amp;' in XML"""
2304     return re.sub(
2305         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2306         '&amp;',
2307         xml_str)
2308
2309
2310 def setproctitle(title):
2311     assert isinstance(title, compat_str)
2312
2313     # ctypes in Jython is not complete
2314     # http://bugs.jython.org/issue2148
2315     if sys.platform.startswith('java'):
2316         return
2317
2318     try:
2319         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2320     except OSError:
2321         return
2322     except TypeError:
2323         # LoadLibrary in Windows Python 2.7.13 only expects
2324         # a bytestring, but since unicode_literals turns
2325         # every string into a unicode string, it fails.
2326         return
2327     title_bytes = title.encode()
2328     buf = ctypes.create_string_buffer(len(title_bytes))
2329     buf.value = title_bytes
2330     try:
2331         libc.prctl(15, buf, 0, 0, 0)
2332     except AttributeError:
2333         return  # Strange libc, just skip this
2334
2335
2336 def remove_start(s, start):
2337     return s[len(start):] if s is not None and s.startswith(start) else s
2338
2339
2340 def remove_end(s, end):
2341     return s[:-len(end)] if s is not None and s.endswith(end) else s
2342
2343
2344 def remove_quotes(s):
2345     if s is None or len(s) < 2:
2346         return s
2347     for quote in ('"', "'", ):
2348         if s[0] == quote and s[-1] == quote:
2349             return s[1:-1]
2350     return s
2351
2352
2353 def get_domain(url):
2354     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
2355     return domain.group('domain') if domain else None
2356
2357
2358 def url_basename(url):
2359     path = compat_urlparse.urlparse(url).path
2360     return path.strip('/').split('/')[-1]
2361
2362
2363 def base_url(url):
2364     return re.match(r'https?://[^?#&]+/', url).group()
2365
2366
2367 def urljoin(base, path):
2368     if isinstance(path, bytes):
2369         path = path.decode()
2370     if not isinstance(path, compat_str) or not path:
2371         return None
2372     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2373         return path
2374     if isinstance(base, bytes):
2375         base = base.decode()
2376     if not isinstance(base, compat_str) or not re.match(
2377             r'^(?:https?:)?//', base):
2378         return None
2379     return compat_urlparse.urljoin(base, path)
2380
2381
2382 class HEADRequest(compat_urllib_request.Request):
2383     def get_method(self):
2384         return 'HEAD'
2385
2386
2387 class PUTRequest(compat_urllib_request.Request):
2388     def get_method(self):
2389         return 'PUT'
2390
2391
2392 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2393     if get_attr and v is not None:
2394         v = getattr(v, get_attr, None)
2395     try:
2396         return int(v) * invscale // scale
2397     except (ValueError, TypeError, OverflowError):
2398         return default
2399
2400
2401 def str_or_none(v, default=None):
2402     return default if v is None else compat_str(v)
2403
2404
2405 def str_to_int(int_str):
2406     """ A more relaxed version of int_or_none """
2407     if isinstance(int_str, int):
2408         return int_str
2409     elif isinstance(int_str, compat_str):
2410         int_str = re.sub(r'[,\.\+]', '', int_str)
2411         return int_or_none(int_str)
2412
2413
2414 def float_or_none(v, scale=1, invscale=1, default=None):
2415     if v is None:
2416         return default
2417     try:
2418         return float(v) * invscale / scale
2419     except (ValueError, TypeError):
2420         return default
2421
2422
2423 def bool_or_none(v, default=None):
2424     return v if isinstance(v, bool) else default
2425
2426
2427 def strip_or_none(v, default=None):
2428     return v.strip() if isinstance(v, compat_str) else default
2429
2430
2431 def url_or_none(url):
2432     if not url or not isinstance(url, compat_str):
2433         return None
2434     url = url.strip()
2435     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2436
2437
2438 def request_to_url(req):
2439     if isinstance(req, compat_urllib_request.Request):
2440         return req.get_full_url()
2441     else:
2442         return req
2443
2444
2445 def strftime_or_none(timestamp, date_format, default=None):
2446     datetime_object = None
2447     try:
2448         if isinstance(timestamp, (int, float)):  # unix timestamp
2449             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2450         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
2451             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2452         return datetime_object.strftime(date_format)
2453     except (ValueError, TypeError, AttributeError):
2454         return default
2455
2456
2457 def parse_duration(s):
2458     if not isinstance(s, str):
2459         return None
2460     s = s.strip()
2461     if not s:
2462         return None
2463
2464     days, hours, mins, secs, ms = [None] * 5
2465     m = re.match(r'''(?x)
2466             (?P<before_secs>
2467                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2468             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2469             (?P<ms>[.:][0-9]+)?Z?$
2470         ''', s)
2471     if m:
2472         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2473     else:
2474         m = re.match(
2475             r'''(?ix)(?:P?
2476                 (?:
2477                     [0-9]+\s*y(?:ears?)?,?\s*
2478                 )?
2479                 (?:
2480                     [0-9]+\s*m(?:onths?)?,?\s*
2481                 )?
2482                 (?:
2483                     [0-9]+\s*w(?:eeks?)?,?\s*
2484                 )?
2485                 (?:
2486                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2487                 )?
2488                 T)?
2489                 (?:
2490                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2491                 )?
2492                 (?:
2493                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2494                 )?
2495                 (?:
2496                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2497                 )?Z?$''', s)
2498         if m:
2499             days, hours, mins, secs, ms = m.groups()
2500         else:
2501             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2502             if m:
2503                 hours, mins = m.groups()
2504             else:
2505                 return None
2506
2507     if ms:
2508         ms = ms.replace(':', '.')
2509     return sum(float(part or 0) * mult for part, mult in (
2510         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2511
2512
2513 def prepend_extension(filename, ext, expected_real_ext=None):
2514     name, real_ext = os.path.splitext(filename)
2515     return (
2516         f'{name}.{ext}{real_ext}'
2517         if not expected_real_ext or real_ext[1:] == expected_real_ext
2518         else f'{filename}.{ext}')
2519
2520
2521 def replace_extension(filename, ext, expected_real_ext=None):
2522     name, real_ext = os.path.splitext(filename)
2523     return '{}.{}'.format(
2524         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2525         ext)
2526
2527
2528 def check_executable(exe, args=[]):
2529     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2530     args can be a list of arguments for a short output (like -version) """
2531     try:
2532         Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
2533     except OSError:
2534         return False
2535     return exe
2536
2537
2538 def _get_exe_version_output(exe, args, *, to_screen=None):
2539     if to_screen:
2540         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2541     try:
2542         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2543         # SIGTTOU if yt-dlp is run in the background.
2544         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2545         out, _ = Popen(
2546             [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
2547             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
2548     except OSError:
2549         return False
2550     if isinstance(out, bytes):  # Python 2.x
2551         out = out.decode('ascii', 'ignore')
2552     return out
2553
2554
2555 def detect_exe_version(output, version_re=None, unrecognized='present'):
2556     assert isinstance(output, compat_str)
2557     if version_re is None:
2558         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2559     m = re.search(version_re, output)
2560     if m:
2561         return m.group(1)
2562     else:
2563         return unrecognized
2564
2565
2566 def get_exe_version(exe, args=['--version'],
2567                     version_re=None, unrecognized='present'):
2568     """ Returns the version of the specified executable,
2569     or False if the executable is not present """
2570     out = _get_exe_version_output(exe, args)
2571     return detect_exe_version(out, version_re, unrecognized) if out else False
2572
2573
2574 class LazyList(collections.abc.Sequence):
2575     """Lazy immutable list from an iterable
2576     Note that slices of a LazyList are lists and not LazyList"""
2577
2578     class IndexError(IndexError):
2579         pass
2580
2581     def __init__(self, iterable, *, reverse=False, _cache=None):
2582         self._iterable = iter(iterable)
2583         self._cache = [] if _cache is None else _cache
2584         self._reversed = reverse
2585
2586     def __iter__(self):
2587         if self._reversed:
2588             # We need to consume the entire iterable to iterate in reverse
2589             yield from self.exhaust()
2590             return
2591         yield from self._cache
2592         for item in self._iterable:
2593             self._cache.append(item)
2594             yield item
2595
2596     def _exhaust(self):
2597         self._cache.extend(self._iterable)
2598         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2599         return self._cache
2600
2601     def exhaust(self):
2602         """Evaluate the entire iterable"""
2603         return self._exhaust()[::-1 if self._reversed else 1]
2604
2605     @staticmethod
2606     def _reverse_index(x):
2607         return None if x is None else -(x + 1)
2608
2609     def __getitem__(self, idx):
2610         if isinstance(idx, slice):
2611             if self._reversed:
2612                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2613             start, stop, step = idx.start, idx.stop, idx.step or 1
2614         elif isinstance(idx, int):
2615             if self._reversed:
2616                 idx = self._reverse_index(idx)
2617             start, stop, step = idx, idx, 0
2618         else:
2619             raise TypeError('indices must be integers or slices')
2620         if ((start or 0) < 0 or (stop or 0) < 0
2621                 or (start is None and step < 0)
2622                 or (stop is None and step > 0)):
2623             # We need to consume the entire iterable to be able to slice from the end
2624             # Obviously, never use this with infinite iterables
2625             self._exhaust()
2626             try:
2627                 return self._cache[idx]
2628             except IndexError as e:
2629                 raise self.IndexError(e) from e
2630         n = max(start or 0, stop or 0) - len(self._cache) + 1
2631         if n > 0:
2632             self._cache.extend(itertools.islice(self._iterable, n))
2633         try:
2634             return self._cache[idx]
2635         except IndexError as e:
2636             raise self.IndexError(e) from e
2637
2638     def __bool__(self):
2639         try:
2640             self[-1] if self._reversed else self[0]
2641         except self.IndexError:
2642             return False
2643         return True
2644
2645     def __len__(self):
2646         self._exhaust()
2647         return len(self._cache)
2648
2649     def __reversed__(self):
2650         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2651
2652     def __copy__(self):
2653         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2654
2655     def __repr__(self):
2656         # repr and str should mimic a list. So we exhaust the iterable
2657         return repr(self.exhaust())
2658
2659     def __str__(self):
2660         return repr(self.exhaust())
2661
2662
2663 class PagedList:
2664
2665     class IndexError(IndexError):
2666         pass
2667
2668     def __len__(self):
2669         # This is only useful for tests
2670         return len(self.getslice())
2671
2672     def __init__(self, pagefunc, pagesize, use_cache=True):
2673         self._pagefunc = pagefunc
2674         self._pagesize = pagesize
2675         self._pagecount = float('inf')
2676         self._use_cache = use_cache
2677         self._cache = {}
2678
2679     def getpage(self, pagenum):
2680         page_results = self._cache.get(pagenum)
2681         if page_results is None:
2682             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2683         if self._use_cache:
2684             self._cache[pagenum] = page_results
2685         return page_results
2686
2687     def getslice(self, start=0, end=None):
2688         return list(self._getslice(start, end))
2689
2690     def _getslice(self, start, end):
2691         raise NotImplementedError('This method must be implemented by subclasses')
2692
2693     def __getitem__(self, idx):
2694         assert self._use_cache, 'Indexing PagedList requires cache'
2695         if not isinstance(idx, int) or idx < 0:
2696             raise TypeError('indices must be non-negative integers')
2697         entries = self.getslice(idx, idx + 1)
2698         if not entries:
2699             raise self.IndexError()
2700         return entries[0]
2701
2702
2703 class OnDemandPagedList(PagedList):
2704     """Download pages until a page with less than maximum results"""
2705
2706     def _getslice(self, start, end):
2707         for pagenum in itertools.count(start // self._pagesize):
2708             firstid = pagenum * self._pagesize
2709             nextfirstid = pagenum * self._pagesize + self._pagesize
2710             if start >= nextfirstid:
2711                 continue
2712
2713             startv = (
2714                 start % self._pagesize
2715                 if firstid <= start < nextfirstid
2716                 else 0)
2717             endv = (
2718                 ((end - 1) % self._pagesize) + 1
2719                 if (end is not None and firstid <= end <= nextfirstid)
2720                 else None)
2721
2722             try:
2723                 page_results = self.getpage(pagenum)
2724             except Exception:
2725                 self._pagecount = pagenum - 1
2726                 raise
2727             if startv != 0 or endv is not None:
2728                 page_results = page_results[startv:endv]
2729             yield from page_results
2730
2731             # A little optimization - if current page is not "full", ie. does
2732             # not contain page_size videos then we can assume that this page
2733             # is the last one - there are no more ids on further pages -
2734             # i.e. no need to query again.
2735             if len(page_results) + startv < self._pagesize:
2736                 break
2737
2738             # If we got the whole page, but the next page is not interesting,
2739             # break out early as well
2740             if end == nextfirstid:
2741                 break
2742
2743
2744 class InAdvancePagedList(PagedList):
2745     """PagedList with total number of pages known in advance"""
2746
2747     def __init__(self, pagefunc, pagecount, pagesize):
2748         PagedList.__init__(self, pagefunc, pagesize, True)
2749         self._pagecount = pagecount
2750
2751     def _getslice(self, start, end):
2752         start_page = start // self._pagesize
2753         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2754         skip_elems = start - start_page * self._pagesize
2755         only_more = None if end is None else end - start
2756         for pagenum in range(start_page, end_page):
2757             page_results = self.getpage(pagenum)
2758             if skip_elems:
2759                 page_results = page_results[skip_elems:]
2760                 skip_elems = None
2761             if only_more is not None:
2762                 if len(page_results) < only_more:
2763                     only_more -= len(page_results)
2764                 else:
2765                     yield from page_results[:only_more]
2766                     break
2767             yield from page_results
2768
2769
2770 def uppercase_escape(s):
2771     unicode_escape = codecs.getdecoder('unicode_escape')
2772     return re.sub(
2773         r'\\U[0-9a-fA-F]{8}',
2774         lambda m: unicode_escape(m.group(0))[0],
2775         s)
2776
2777
2778 def lowercase_escape(s):
2779     unicode_escape = codecs.getdecoder('unicode_escape')
2780     return re.sub(
2781         r'\\u[0-9a-fA-F]{4}',
2782         lambda m: unicode_escape(m.group(0))[0],
2783         s)
2784
2785
2786 def escape_rfc3986(s):
2787     """Escape non-ASCII characters as suggested by RFC 3986"""
2788     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2789
2790
2791 def escape_url(url):
2792     """Escape URL as suggested by RFC 3986"""
2793     url_parsed = compat_urllib_parse_urlparse(url)
2794     return url_parsed._replace(
2795         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2796         path=escape_rfc3986(url_parsed.path),
2797         params=escape_rfc3986(url_parsed.params),
2798         query=escape_rfc3986(url_parsed.query),
2799         fragment=escape_rfc3986(url_parsed.fragment)
2800     ).geturl()
2801
2802
2803 def parse_qs(url):
2804     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2805
2806
2807 def read_batch_urls(batch_fd):
2808     def fixup(url):
2809         if not isinstance(url, compat_str):
2810             url = url.decode('utf-8', 'replace')
2811         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
2812         for bom in BOM_UTF8:
2813             if url.startswith(bom):
2814                 url = url[len(bom):]
2815         url = url.lstrip()
2816         if not url or url.startswith(('#', ';', ']')):
2817             return False
2818         # "#" cannot be stripped out since it is part of the URI
2819         # However, it can be safely stipped out if follwing a whitespace
2820         return re.split(r'\s#', url, 1)[0].rstrip()
2821
2822     with contextlib.closing(batch_fd) as fd:
2823         return [url for url in map(fixup, fd) if url]
2824
2825
2826 def urlencode_postdata(*args, **kargs):
2827     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
2828
2829
2830 def update_url_query(url, query):
2831     if not query:
2832         return url
2833     parsed_url = compat_urlparse.urlparse(url)
2834     qs = compat_parse_qs(parsed_url.query)
2835     qs.update(query)
2836     return compat_urlparse.urlunparse(parsed_url._replace(
2837         query=compat_urllib_parse_urlencode(qs, True)))
2838
2839
2840 def update_Request(req, url=None, data=None, headers={}, query={}):
2841     req_headers = req.headers.copy()
2842     req_headers.update(headers)
2843     req_data = data or req.data
2844     req_url = update_url_query(url or req.get_full_url(), query)
2845     req_get_method = req.get_method()
2846     if req_get_method == 'HEAD':
2847         req_type = HEADRequest
2848     elif req_get_method == 'PUT':
2849         req_type = PUTRequest
2850     else:
2851         req_type = compat_urllib_request.Request
2852     new_req = req_type(
2853         req_url, data=req_data, headers=req_headers,
2854         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
2855     if hasattr(req, 'timeout'):
2856         new_req.timeout = req.timeout
2857     return new_req
2858
2859
2860 def _multipart_encode_impl(data, boundary):
2861     content_type = 'multipart/form-data; boundary=%s' % boundary
2862
2863     out = b''
2864     for k, v in data.items():
2865         out += b'--' + boundary.encode('ascii') + b'\r\n'
2866         if isinstance(k, compat_str):
2867             k = k.encode()
2868         if isinstance(v, compat_str):
2869             v = v.encode()
2870         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
2871         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
2872         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
2873         if boundary.encode('ascii') in content:
2874             raise ValueError('Boundary overlaps with data')
2875         out += content
2876
2877     out += b'--' + boundary.encode('ascii') + b'--\r\n'
2878
2879     return out, content_type
2880
2881
2882 def multipart_encode(data, boundary=None):
2883     '''
2884     Encode a dict to RFC 7578-compliant form-data
2885
2886     data:
2887         A dict where keys and values can be either Unicode or bytes-like
2888         objects.
2889     boundary:
2890         If specified a Unicode object, it's used as the boundary. Otherwise
2891         a random boundary is generated.
2892
2893     Reference: https://tools.ietf.org/html/rfc7578
2894     '''
2895     has_specified_boundary = boundary is not None
2896
2897     while True:
2898         if boundary is None:
2899             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
2900
2901         try:
2902             out, content_type = _multipart_encode_impl(data, boundary)
2903             break
2904         except ValueError:
2905             if has_specified_boundary:
2906                 raise
2907             boundary = None
2908
2909     return out, content_type
2910
2911
2912 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
2913     for val in map(d.get, variadic(key_or_keys)):
2914         if val is not None and (val or not skip_false_values):
2915             return val
2916     return default
2917
2918
2919 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
2920     for f in funcs:
2921         try:
2922             val = f(*args, **kwargs)
2923         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
2924             pass
2925         else:
2926             if expected_type is None or isinstance(val, expected_type):
2927                 return val
2928
2929
2930 def try_get(src, getter, expected_type=None):
2931     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
2932
2933
2934 def filter_dict(dct, cndn=lambda _, v: v is not None):
2935     return {k: v for k, v in dct.items() if cndn(k, v)}
2936
2937
2938 def merge_dicts(*dicts):
2939     merged = {}
2940     for a_dict in dicts:
2941         for k, v in a_dict.items():
2942             if (v is not None and k not in merged
2943                     or isinstance(v, str) and merged[k] == ''):
2944                 merged[k] = v
2945     return merged
2946
2947
2948 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
2949     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
2950
2951
2952 US_RATINGS = {
2953     'G': 0,
2954     'PG': 10,
2955     'PG-13': 13,
2956     'R': 16,
2957     'NC': 18,
2958 }
2959
2960
2961 TV_PARENTAL_GUIDELINES = {
2962     'TV-Y': 0,
2963     'TV-Y7': 7,
2964     'TV-G': 0,
2965     'TV-PG': 0,
2966     'TV-14': 14,
2967     'TV-MA': 17,
2968 }
2969
2970
2971 def parse_age_limit(s):
2972     # isinstance(False, int) is True. So type() must be used instead
2973     if type(s) is int:
2974         return s if 0 <= s <= 21 else None
2975     elif not isinstance(s, str):
2976         return None
2977     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
2978     if m:
2979         return int(m.group('age'))
2980     s = s.upper()
2981     if s in US_RATINGS:
2982         return US_RATINGS[s]
2983     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
2984     if m:
2985         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
2986     return None
2987
2988
2989 def strip_jsonp(code):
2990     return re.sub(
2991         r'''(?sx)^
2992             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
2993             (?:\s*&&\s*(?P=func_name))?
2994             \s*\(\s*(?P<callback_data>.*)\);?
2995             \s*?(?://[^\n]*)*$''',
2996         r'\g<callback_data>', code)
2997
2998
2999 def js_to_json(code, vars={}):
3000     # vars is a dict of var, val pairs to substitute
3001     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3002     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3003     INTEGER_TABLE = (
3004         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3005         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3006     )
3007
3008     def fix_kv(m):
3009         v = m.group(0)
3010         if v in ('true', 'false', 'null'):
3011             return v
3012         elif v in ('undefined', 'void 0'):
3013             return 'null'
3014         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3015             return ""
3016
3017         if v[0] in ("'", '"'):
3018             v = re.sub(r'(?s)\\.|"', lambda m: {
3019                 '"': '\\"',
3020                 "\\'": "'",
3021                 '\\\n': '',
3022                 '\\x': '\\u00',
3023             }.get(m.group(0), m.group(0)), v[1:-1])
3024         else:
3025             for regex, base in INTEGER_TABLE:
3026                 im = re.match(regex, v)
3027                 if im:
3028                     i = int(im.group(1), base)
3029                     return '"%d":' % i if v.endswith(':') else '%d' % i
3030
3031             if v in vars:
3032                 return vars[v]
3033
3034         return '"%s"' % v
3035
3036     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3037
3038     return re.sub(r'''(?sx)
3039         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
3040         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
3041         {comment}|,(?={skip}[\]}}])|
3042         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3043         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3044         [0-9]+(?={skip}:)|
3045         !+
3046         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3047
3048
3049 def qualities(quality_ids):
3050     """ Get a numeric quality value out of a list of possible values """
3051     def q(qid):
3052         try:
3053             return quality_ids.index(qid)
3054         except ValueError:
3055             return -1
3056     return q
3057
3058
3059 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist')
3060
3061
3062 DEFAULT_OUTTMPL = {
3063     'default': '%(title)s [%(id)s].%(ext)s',
3064     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3065 }
3066 OUTTMPL_TYPES = {
3067     'chapter': None,
3068     'subtitle': None,
3069     'thumbnail': None,
3070     'description': 'description',
3071     'annotation': 'annotations.xml',
3072     'infojson': 'info.json',
3073     'link': None,
3074     'pl_video': None,
3075     'pl_thumbnail': None,
3076     'pl_description': 'description',
3077     'pl_infojson': 'info.json',
3078 }
3079
3080 # As of [1] format syntax is:
3081 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3082 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3083 STR_FORMAT_RE_TMPL = r'''(?x)
3084     (?<!%)(?P<prefix>(?:%%)*)
3085     %
3086     (?P<has_key>\((?P<key>{0})\))?
3087     (?P<format>
3088         (?P<conversion>[#0\-+ ]+)?
3089         (?P<min_width>\d+)?
3090         (?P<precision>\.\d+)?
3091         (?P<len_mod>[hlL])?  # unused in python
3092         {1}  # conversion type
3093     )
3094 '''
3095
3096
3097 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3098
3099
3100 def limit_length(s, length):
3101     """ Add ellipses to overly long strings """
3102     if s is None:
3103         return None
3104     ELLIPSES = '...'
3105     if len(s) > length:
3106         return s[:length - len(ELLIPSES)] + ELLIPSES
3107     return s
3108
3109
3110 def version_tuple(v):
3111     return tuple(int(e) for e in re.split(r'[-.]', v))
3112
3113
3114 def is_outdated_version(version, limit, assume_new=True):
3115     if not version:
3116         return not assume_new
3117     try:
3118         return version_tuple(version) < version_tuple(limit)
3119     except ValueError:
3120         return not assume_new
3121
3122
3123 def ytdl_is_updateable():
3124     """ Returns if yt-dlp can be updated with -U """
3125
3126     from .update import is_non_updateable
3127
3128     return not is_non_updateable()
3129
3130
3131 def args_to_str(args):
3132     # Get a short string representation for a subprocess command
3133     return ' '.join(compat_shlex_quote(a) for a in args)
3134
3135
3136 def error_to_compat_str(err):
3137     return str(err)
3138
3139
3140 def error_to_str(err):
3141     return f'{type(err).__name__}: {err}'
3142
3143
3144 def mimetype2ext(mt):
3145     if mt is None:
3146         return None
3147
3148     mt, _, params = mt.partition(';')
3149     mt = mt.strip()
3150
3151     FULL_MAP = {
3152         'audio/mp4': 'm4a',
3153         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3154         # it's the most popular one
3155         'audio/mpeg': 'mp3',
3156         'audio/x-wav': 'wav',
3157         'audio/wav': 'wav',
3158         'audio/wave': 'wav',
3159     }
3160
3161     ext = FULL_MAP.get(mt)
3162     if ext is not None:
3163         return ext
3164
3165     SUBTYPE_MAP = {
3166         '3gpp': '3gp',
3167         'smptett+xml': 'tt',
3168         'ttaf+xml': 'dfxp',
3169         'ttml+xml': 'ttml',
3170         'x-flv': 'flv',
3171         'x-mp4-fragmented': 'mp4',
3172         'x-ms-sami': 'sami',
3173         'x-ms-wmv': 'wmv',
3174         'mpegurl': 'm3u8',
3175         'x-mpegurl': 'm3u8',
3176         'vnd.apple.mpegurl': 'm3u8',
3177         'dash+xml': 'mpd',
3178         'f4m+xml': 'f4m',
3179         'hds+xml': 'f4m',
3180         'vnd.ms-sstr+xml': 'ism',
3181         'quicktime': 'mov',
3182         'mp2t': 'ts',
3183         'x-wav': 'wav',
3184         'filmstrip+json': 'fs',
3185         'svg+xml': 'svg',
3186     }
3187
3188     _, _, subtype = mt.rpartition('/')
3189     ext = SUBTYPE_MAP.get(subtype.lower())
3190     if ext is not None:
3191         return ext
3192
3193     SUFFIX_MAP = {
3194         'json': 'json',
3195         'xml': 'xml',
3196         'zip': 'zip',
3197         'gzip': 'gz',
3198     }
3199
3200     _, _, suffix = subtype.partition('+')
3201     ext = SUFFIX_MAP.get(suffix)
3202     if ext is not None:
3203         return ext
3204
3205     return subtype.replace('+', '.')
3206
3207
3208 def ext2mimetype(ext_or_url):
3209     if not ext_or_url:
3210         return None
3211     if '.' not in ext_or_url:
3212         ext_or_url = f'file.{ext_or_url}'
3213     return mimetypes.guess_type(ext_or_url)[0]
3214
3215
3216 def parse_codecs(codecs_str):
3217     # http://tools.ietf.org/html/rfc6381
3218     if not codecs_str:
3219         return {}
3220     split_codecs = list(filter(None, map(
3221         str.strip, codecs_str.strip().strip(',').split(','))))
3222     vcodec, acodec, scodec, hdr = None, None, None, None
3223     for full_codec in split_codecs:
3224         parts = full_codec.split('.')
3225         codec = parts[0].replace('0', '')
3226         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3227                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3228             if not vcodec:
3229                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
3230                 if codec in ('dvh1', 'dvhe'):
3231                     hdr = 'DV'
3232                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
3233                     hdr = 'HDR10'
3234                 elif full_codec.replace('0', '').startswith('vp9.2'):
3235                     hdr = 'HDR10'
3236         elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3237             if not acodec:
3238                 acodec = full_codec
3239         elif codec in ('stpp', 'wvtt',):
3240             if not scodec:
3241                 scodec = full_codec
3242         else:
3243             write_string(f'WARNING: Unknown codec {full_codec}\n')
3244     if vcodec or acodec or scodec:
3245         return {
3246             'vcodec': vcodec or 'none',
3247             'acodec': acodec or 'none',
3248             'dynamic_range': hdr,
3249             **({'scodec': scodec} if scodec is not None else {}),
3250         }
3251     elif len(split_codecs) == 2:
3252         return {
3253             'vcodec': split_codecs[0],
3254             'acodec': split_codecs[1],
3255         }
3256     return {}
3257
3258
3259 def urlhandle_detect_ext(url_handle):
3260     getheader = url_handle.headers.get
3261
3262     cd = getheader('Content-Disposition')
3263     if cd:
3264         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3265         if m:
3266             e = determine_ext(m.group('filename'), default_ext=None)
3267             if e:
3268                 return e
3269
3270     return mimetype2ext(getheader('Content-Type'))
3271
3272
3273 def encode_data_uri(data, mime_type):
3274     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3275
3276
3277 def age_restricted(content_limit, age_limit):
3278     """ Returns True iff the content should be blocked """
3279
3280     if age_limit is None:  # No limit set
3281         return False
3282     if content_limit is None:
3283         return False  # Content available for everyone
3284     return age_limit < content_limit
3285
3286
3287 def is_html(first_bytes):
3288     """ Detect whether a file contains HTML by examining its first bytes. """
3289
3290     BOMS = [
3291         (b'\xef\xbb\xbf', 'utf-8'),
3292         (b'\x00\x00\xfe\xff', 'utf-32-be'),
3293         (b'\xff\xfe\x00\x00', 'utf-32-le'),
3294         (b'\xff\xfe', 'utf-16-le'),
3295         (b'\xfe\xff', 'utf-16-be'),
3296     ]
3297
3298     encoding = 'utf-8'
3299     for bom, enc in BOMS:
3300         while first_bytes.startswith(bom):
3301             encoding, first_bytes = enc, first_bytes[len(bom):]
3302
3303     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3304
3305
3306 def determine_protocol(info_dict):
3307     protocol = info_dict.get('protocol')
3308     if protocol is not None:
3309         return protocol
3310
3311     url = sanitize_url(info_dict['url'])
3312     if url.startswith('rtmp'):
3313         return 'rtmp'
3314     elif url.startswith('mms'):
3315         return 'mms'
3316     elif url.startswith('rtsp'):
3317         return 'rtsp'
3318
3319     ext = determine_ext(url)
3320     if ext == 'm3u8':
3321         return 'm3u8'
3322     elif ext == 'f4m':
3323         return 'f4m'
3324
3325     return compat_urllib_parse_urlparse(url).scheme
3326
3327
3328 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3329     """ Render a list of rows, each as a list of values.
3330     Text after a \t will be right aligned """
3331     def width(string):
3332         return len(remove_terminal_sequences(string).replace('\t', ''))
3333
3334     def get_max_lens(table):
3335         return [max(width(str(v)) for v in col) for col in zip(*table)]
3336
3337     def filter_using_list(row, filterArray):
3338         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3339
3340     max_lens = get_max_lens(data) if hide_empty else []
3341     header_row = filter_using_list(header_row, max_lens)
3342     data = [filter_using_list(row, max_lens) for row in data]
3343
3344     table = [header_row] + data
3345     max_lens = get_max_lens(table)
3346     extra_gap += 1
3347     if delim:
3348         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3349         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3350     for row in table:
3351         for pos, text in enumerate(map(str, row)):
3352             if '\t' in text:
3353                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3354             else:
3355                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3356     ret = '\n'.join(''.join(row).rstrip() for row in table)
3357     return ret
3358
3359
3360 def _match_one(filter_part, dct, incomplete):
3361     # TODO: Generalize code with YoutubeDL._build_format_filter
3362     STRING_OPERATORS = {
3363         '*=': operator.contains,
3364         '^=': lambda attr, value: attr.startswith(value),
3365         '$=': lambda attr, value: attr.endswith(value),
3366         '~=': lambda attr, value: re.search(value, attr),
3367     }
3368     COMPARISON_OPERATORS = {
3369         **STRING_OPERATORS,
3370         '<=': operator.le,  # "<=" must be defined above "<"
3371         '<': operator.lt,
3372         '>=': operator.ge,
3373         '>': operator.gt,
3374         '=': operator.eq,
3375     }
3376
3377     if isinstance(incomplete, bool):
3378         is_incomplete = lambda _: incomplete
3379     else:
3380         is_incomplete = lambda k: k in incomplete
3381
3382     operator_rex = re.compile(r'''(?x)\s*
3383         (?P<key>[a-z_]+)
3384         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3385         (?:
3386             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3387             (?P<strval>.+?)
3388         )
3389         \s*$
3390         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3391     m = operator_rex.search(filter_part)
3392     if m:
3393         m = m.groupdict()
3394         unnegated_op = COMPARISON_OPERATORS[m['op']]
3395         if m['negation']:
3396             op = lambda attr, value: not unnegated_op(attr, value)
3397         else:
3398             op = unnegated_op
3399         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3400         if m['quote']:
3401             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3402         actual_value = dct.get(m['key'])
3403         numeric_comparison = None
3404         if isinstance(actual_value, (int, float)):
3405             # If the original field is a string and matching comparisonvalue is
3406             # a number we should respect the origin of the original field
3407             # and process comparison value as a string (see
3408             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3409             try:
3410                 numeric_comparison = int(comparison_value)
3411             except ValueError:
3412                 numeric_comparison = parse_filesize(comparison_value)
3413                 if numeric_comparison is None:
3414                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3415                 if numeric_comparison is None:
3416                     numeric_comparison = parse_duration(comparison_value)
3417         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3418             raise ValueError('Operator %s only supports string values!' % m['op'])
3419         if actual_value is None:
3420             return is_incomplete(m['key']) or m['none_inclusive']
3421         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3422
3423     UNARY_OPERATORS = {
3424         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3425         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3426     }
3427     operator_rex = re.compile(r'''(?x)\s*
3428         (?P<op>%s)\s*(?P<key>[a-z_]+)
3429         \s*$
3430         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3431     m = operator_rex.search(filter_part)
3432     if m:
3433         op = UNARY_OPERATORS[m.group('op')]
3434         actual_value = dct.get(m.group('key'))
3435         if is_incomplete(m.group('key')) and actual_value is None:
3436             return True
3437         return op(actual_value)
3438
3439     raise ValueError('Invalid filter part %r' % filter_part)
3440
3441
3442 def match_str(filter_str, dct, incomplete=False):
3443     """ Filter a dictionary with a simple string syntax.
3444     @returns           Whether the filter passes
3445     @param incomplete  Set of keys that is expected to be missing from dct.
3446                        Can be True/False to indicate all/none of the keys may be missing.
3447                        All conditions on incomplete keys pass if the key is missing
3448     """
3449     return all(
3450         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3451         for filter_part in re.split(r'(?<!\\)&', filter_str))
3452
3453
3454 def match_filter_func(filters):
3455     if not filters:
3456         return None
3457     filters = set(variadic(filters))
3458
3459     interactive = '-' in filters
3460     if interactive:
3461         filters.remove('-')
3462
3463     def _match_func(info_dict, incomplete=False):
3464         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3465             return NO_DEFAULT if interactive and not incomplete else None
3466         else:
3467             video_title = info_dict.get('title') or info_dict.get('id') or 'video'
3468             filter_str = ') | ('.join(map(str.strip, filters))
3469             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3470     return _match_func
3471
3472
3473 def parse_dfxp_time_expr(time_expr):
3474     if not time_expr:
3475         return
3476
3477     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3478     if mobj:
3479         return float(mobj.group('time_offset'))
3480
3481     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3482     if mobj:
3483         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3484
3485
3486 def srt_subtitles_timecode(seconds):
3487     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3488
3489
3490 def ass_subtitles_timecode(seconds):
3491     time = timetuple_from_msec(seconds * 1000)
3492     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3493
3494
3495 def dfxp2srt(dfxp_data):
3496     '''
3497     @param dfxp_data A bytes-like object containing DFXP data
3498     @returns A unicode object containing converted SRT data
3499     '''
3500     LEGACY_NAMESPACES = (
3501         (b'http://www.w3.org/ns/ttml', [
3502             b'http://www.w3.org/2004/11/ttaf1',
3503             b'http://www.w3.org/2006/04/ttaf1',
3504             b'http://www.w3.org/2006/10/ttaf1',
3505         ]),
3506         (b'http://www.w3.org/ns/ttml#styling', [
3507             b'http://www.w3.org/ns/ttml#style',
3508         ]),
3509     )
3510
3511     SUPPORTED_STYLING = [
3512         'color',
3513         'fontFamily',
3514         'fontSize',
3515         'fontStyle',
3516         'fontWeight',
3517         'textDecoration'
3518     ]
3519
3520     _x = functools.partial(xpath_with_ns, ns_map={
3521         'xml': 'http://www.w3.org/XML/1998/namespace',
3522         'ttml': 'http://www.w3.org/ns/ttml',
3523         'tts': 'http://www.w3.org/ns/ttml#styling',
3524     })
3525
3526     styles = {}
3527     default_style = {}
3528
3529     class TTMLPElementParser:
3530         _out = ''
3531         _unclosed_elements = []
3532         _applied_styles = []
3533
3534         def start(self, tag, attrib):
3535             if tag in (_x('ttml:br'), 'br'):
3536                 self._out += '\n'
3537             else:
3538                 unclosed_elements = []
3539                 style = {}
3540                 element_style_id = attrib.get('style')
3541                 if default_style:
3542                     style.update(default_style)
3543                 if element_style_id:
3544                     style.update(styles.get(element_style_id, {}))
3545                 for prop in SUPPORTED_STYLING:
3546                     prop_val = attrib.get(_x('tts:' + prop))
3547                     if prop_val:
3548                         style[prop] = prop_val
3549                 if style:
3550                     font = ''
3551                     for k, v in sorted(style.items()):
3552                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3553                             continue
3554                         if k == 'color':
3555                             font += ' color="%s"' % v
3556                         elif k == 'fontSize':
3557                             font += ' size="%s"' % v
3558                         elif k == 'fontFamily':
3559                             font += ' face="%s"' % v
3560                         elif k == 'fontWeight' and v == 'bold':
3561                             self._out += '<b>'
3562                             unclosed_elements.append('b')
3563                         elif k == 'fontStyle' and v == 'italic':
3564                             self._out += '<i>'
3565                             unclosed_elements.append('i')
3566                         elif k == 'textDecoration' and v == 'underline':
3567                             self._out += '<u>'
3568                             unclosed_elements.append('u')
3569                     if font:
3570                         self._out += '<font' + font + '>'
3571                         unclosed_elements.append('font')
3572                     applied_style = {}
3573                     if self._applied_styles:
3574                         applied_style.update(self._applied_styles[-1])
3575                     applied_style.update(style)
3576                     self._applied_styles.append(applied_style)
3577                 self._unclosed_elements.append(unclosed_elements)
3578
3579         def end(self, tag):
3580             if tag not in (_x('ttml:br'), 'br'):
3581                 unclosed_elements = self._unclosed_elements.pop()
3582                 for element in reversed(unclosed_elements):
3583                     self._out += '</%s>' % element
3584                 if unclosed_elements and self._applied_styles:
3585                     self._applied_styles.pop()
3586
3587         def data(self, data):
3588             self._out += data
3589
3590         def close(self):
3591             return self._out.strip()
3592
3593     def parse_node(node):
3594         target = TTMLPElementParser()
3595         parser = xml.etree.ElementTree.XMLParser(target=target)
3596         parser.feed(xml.etree.ElementTree.tostring(node))
3597         return parser.close()
3598
3599     for k, v in LEGACY_NAMESPACES:
3600         for ns in v:
3601             dfxp_data = dfxp_data.replace(ns, k)
3602
3603     dfxp = compat_etree_fromstring(dfxp_data)
3604     out = []
3605     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3606
3607     if not paras:
3608         raise ValueError('Invalid dfxp/TTML subtitle')
3609
3610     repeat = False
3611     while True:
3612         for style in dfxp.findall(_x('.//ttml:style')):
3613             style_id = style.get('id') or style.get(_x('xml:id'))
3614             if not style_id:
3615                 continue
3616             parent_style_id = style.get('style')
3617             if parent_style_id:
3618                 if parent_style_id not in styles:
3619                     repeat = True
3620                     continue
3621                 styles[style_id] = styles[parent_style_id].copy()
3622             for prop in SUPPORTED_STYLING:
3623                 prop_val = style.get(_x('tts:' + prop))
3624                 if prop_val:
3625                     styles.setdefault(style_id, {})[prop] = prop_val
3626         if repeat:
3627             repeat = False
3628         else:
3629             break
3630
3631     for p in ('body', 'div'):
3632         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3633         if ele is None:
3634             continue
3635         style = styles.get(ele.get('style'))
3636         if not style:
3637             continue
3638         default_style.update(style)
3639
3640     for para, index in zip(paras, itertools.count(1)):
3641         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3642         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3643         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3644         if begin_time is None:
3645             continue
3646         if not end_time:
3647             if not dur:
3648                 continue
3649             end_time = begin_time + dur
3650         out.append('%d\n%s --> %s\n%s\n\n' % (
3651             index,
3652             srt_subtitles_timecode(begin_time),
3653             srt_subtitles_timecode(end_time),
3654             parse_node(para)))
3655
3656     return ''.join(out)
3657
3658
3659 def cli_option(params, command_option, param):
3660     param = params.get(param)
3661     if param:
3662         param = compat_str(param)
3663     return [command_option, param] if param is not None else []
3664
3665
3666 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3667     param = params.get(param)
3668     if param is None:
3669         return []
3670     assert isinstance(param, bool)
3671     if separator:
3672         return [command_option + separator + (true_value if param else false_value)]
3673     return [command_option, true_value if param else false_value]
3674
3675
3676 def cli_valueless_option(params, command_option, param, expected_value=True):
3677     param = params.get(param)
3678     return [command_option] if param == expected_value else []
3679
3680
3681 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3682     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3683         if use_compat:
3684             return argdict
3685         else:
3686             argdict = None
3687     if argdict is None:
3688         return default
3689     assert isinstance(argdict, dict)
3690
3691     assert isinstance(keys, (list, tuple))
3692     for key_list in keys:
3693         arg_list = list(filter(
3694             lambda x: x is not None,
3695             [argdict.get(key.lower()) for key in variadic(key_list)]))
3696         if arg_list:
3697             return [arg for args in arg_list for arg in args]
3698     return default
3699
3700
3701 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3702     main_key, exe = main_key.lower(), exe.lower()
3703     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3704     keys = [f'{root_key}{k}' for k in (keys or [''])]
3705     if root_key in keys:
3706         if main_key != exe:
3707             keys.append((main_key, exe))
3708         keys.append('default')
3709     else:
3710         use_compat = False
3711     return cli_configuration_args(argdict, keys, default, use_compat)
3712
3713
3714 class ISO639Utils:
3715     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3716     _lang_map = {
3717         'aa': 'aar',
3718         'ab': 'abk',
3719         'ae': 'ave',
3720         'af': 'afr',
3721         'ak': 'aka',
3722         'am': 'amh',
3723         'an': 'arg',
3724         'ar': 'ara',
3725         'as': 'asm',
3726         'av': 'ava',
3727         'ay': 'aym',
3728         'az': 'aze',
3729         'ba': 'bak',
3730         'be': 'bel',
3731         'bg': 'bul',
3732         'bh': 'bih',
3733         'bi': 'bis',
3734         'bm': 'bam',
3735         'bn': 'ben',
3736         'bo': 'bod',
3737         'br': 'bre',
3738         'bs': 'bos',
3739         'ca': 'cat',
3740         'ce': 'che',
3741         'ch': 'cha',
3742         'co': 'cos',
3743         'cr': 'cre',
3744         'cs': 'ces',
3745         'cu': 'chu',
3746         'cv': 'chv',
3747         'cy': 'cym',
3748         'da': 'dan',
3749         'de': 'deu',
3750         'dv': 'div',
3751         'dz': 'dzo',
3752         'ee': 'ewe',
3753         'el': 'ell',
3754         'en': 'eng',
3755         'eo': 'epo',
3756         'es': 'spa',
3757         'et': 'est',
3758         'eu': 'eus',
3759         'fa': 'fas',
3760         'ff': 'ful',
3761         'fi': 'fin',
3762         'fj': 'fij',
3763         'fo': 'fao',
3764         'fr': 'fra',
3765         'fy': 'fry',
3766         'ga': 'gle',
3767         'gd': 'gla',
3768         'gl': 'glg',
3769         'gn': 'grn',
3770         'gu': 'guj',
3771         'gv': 'glv',
3772         'ha': 'hau',
3773         'he': 'heb',
3774         'iw': 'heb',  # Replaced by he in 1989 revision
3775         'hi': 'hin',
3776         'ho': 'hmo',
3777         'hr': 'hrv',
3778         'ht': 'hat',
3779         'hu': 'hun',
3780         'hy': 'hye',
3781         'hz': 'her',
3782         'ia': 'ina',
3783         'id': 'ind',
3784         'in': 'ind',  # Replaced by id in 1989 revision
3785         'ie': 'ile',
3786         'ig': 'ibo',
3787         'ii': 'iii',
3788         'ik': 'ipk',
3789         'io': 'ido',
3790         'is': 'isl',
3791         'it': 'ita',
3792         'iu': 'iku',
3793         'ja': 'jpn',
3794         'jv': 'jav',
3795         'ka': 'kat',
3796         'kg': 'kon',
3797         'ki': 'kik',
3798         'kj': 'kua',
3799         'kk': 'kaz',
3800         'kl': 'kal',
3801         'km': 'khm',
3802         'kn': 'kan',
3803         'ko': 'kor',
3804         'kr': 'kau',
3805         'ks': 'kas',
3806         'ku': 'kur',
3807         'kv': 'kom',
3808         'kw': 'cor',
3809         'ky': 'kir',
3810         'la': 'lat',
3811         'lb': 'ltz',
3812         'lg': 'lug',
3813         'li': 'lim',
3814         'ln': 'lin',
3815         'lo': 'lao',
3816         'lt': 'lit',
3817         'lu': 'lub',
3818         'lv': 'lav',
3819         'mg': 'mlg',
3820         'mh': 'mah',
3821         'mi': 'mri',
3822         'mk': 'mkd',
3823         'ml': 'mal',
3824         'mn': 'mon',
3825         'mr': 'mar',
3826         'ms': 'msa',
3827         'mt': 'mlt',
3828         'my': 'mya',
3829         'na': 'nau',
3830         'nb': 'nob',
3831         'nd': 'nde',
3832         'ne': 'nep',
3833         'ng': 'ndo',
3834         'nl': 'nld',
3835         'nn': 'nno',
3836         'no': 'nor',
3837         'nr': 'nbl',
3838         'nv': 'nav',
3839         'ny': 'nya',
3840         'oc': 'oci',
3841         'oj': 'oji',
3842         'om': 'orm',
3843         'or': 'ori',
3844         'os': 'oss',
3845         'pa': 'pan',
3846         'pi': 'pli',
3847         'pl': 'pol',
3848         'ps': 'pus',
3849         'pt': 'por',
3850         'qu': 'que',
3851         'rm': 'roh',
3852         'rn': 'run',
3853         'ro': 'ron',
3854         'ru': 'rus',
3855         'rw': 'kin',
3856         'sa': 'san',
3857         'sc': 'srd',
3858         'sd': 'snd',
3859         'se': 'sme',
3860         'sg': 'sag',
3861         'si': 'sin',
3862         'sk': 'slk',
3863         'sl': 'slv',
3864         'sm': 'smo',
3865         'sn': 'sna',
3866         'so': 'som',
3867         'sq': 'sqi',
3868         'sr': 'srp',
3869         'ss': 'ssw',
3870         'st': 'sot',
3871         'su': 'sun',
3872         'sv': 'swe',
3873         'sw': 'swa',
3874         'ta': 'tam',
3875         'te': 'tel',
3876         'tg': 'tgk',
3877         'th': 'tha',
3878         'ti': 'tir',
3879         'tk': 'tuk',
3880         'tl': 'tgl',
3881         'tn': 'tsn',
3882         'to': 'ton',
3883         'tr': 'tur',
3884         'ts': 'tso',
3885         'tt': 'tat',
3886         'tw': 'twi',
3887         'ty': 'tah',
3888         'ug': 'uig',
3889         'uk': 'ukr',
3890         'ur': 'urd',
3891         'uz': 'uzb',
3892         've': 'ven',
3893         'vi': 'vie',
3894         'vo': 'vol',
3895         'wa': 'wln',
3896         'wo': 'wol',
3897         'xh': 'xho',
3898         'yi': 'yid',
3899         'ji': 'yid',  # Replaced by yi in 1989 revision
3900         'yo': 'yor',
3901         'za': 'zha',
3902         'zh': 'zho',
3903         'zu': 'zul',
3904     }
3905
3906     @classmethod
3907     def short2long(cls, code):
3908         """Convert language code from ISO 639-1 to ISO 639-2/T"""
3909         return cls._lang_map.get(code[:2])
3910
3911     @classmethod
3912     def long2short(cls, code):
3913         """Convert language code from ISO 639-2/T to ISO 639-1"""
3914         for short_name, long_name in cls._lang_map.items():
3915             if long_name == code:
3916                 return short_name
3917
3918
3919 class ISO3166Utils:
3920     # From http://data.okfn.org/data/core/country-list
3921     _country_map = {
3922         'AF': 'Afghanistan',
3923         'AX': 'Åland Islands',
3924         'AL': 'Albania',
3925         'DZ': 'Algeria',
3926         'AS': 'American Samoa',
3927         'AD': 'Andorra',
3928         'AO': 'Angola',
3929         'AI': 'Anguilla',
3930         'AQ': 'Antarctica',
3931         'AG': 'Antigua and Barbuda',
3932         'AR': 'Argentina',
3933         'AM': 'Armenia',
3934         'AW': 'Aruba',
3935         'AU': 'Australia',
3936         'AT': 'Austria',
3937         'AZ': 'Azerbaijan',
3938         'BS': 'Bahamas',
3939         'BH': 'Bahrain',
3940         'BD': 'Bangladesh',
3941         'BB': 'Barbados',
3942         'BY': 'Belarus',
3943         'BE': 'Belgium',
3944         'BZ': 'Belize',
3945         'BJ': 'Benin',
3946         'BM': 'Bermuda',
3947         'BT': 'Bhutan',
3948         'BO': 'Bolivia, Plurinational State of',
3949         'BQ': 'Bonaire, Sint Eustatius and Saba',
3950         'BA': 'Bosnia and Herzegovina',
3951         'BW': 'Botswana',
3952         'BV': 'Bouvet Island',
3953         'BR': 'Brazil',
3954         'IO': 'British Indian Ocean Territory',
3955         'BN': 'Brunei Darussalam',
3956         'BG': 'Bulgaria',
3957         'BF': 'Burkina Faso',
3958         'BI': 'Burundi',
3959         'KH': 'Cambodia',
3960         'CM': 'Cameroon',
3961         'CA': 'Canada',
3962         'CV': 'Cape Verde',
3963         'KY': 'Cayman Islands',
3964         'CF': 'Central African Republic',
3965         'TD': 'Chad',
3966         'CL': 'Chile',
3967         'CN': 'China',
3968         'CX': 'Christmas Island',
3969         'CC': 'Cocos (Keeling) Islands',
3970         'CO': 'Colombia',
3971         'KM': 'Comoros',
3972         'CG': 'Congo',
3973         'CD': 'Congo, the Democratic Republic of the',
3974         'CK': 'Cook Islands',
3975         'CR': 'Costa Rica',
3976         'CI': 'Côte d\'Ivoire',
3977         'HR': 'Croatia',
3978         'CU': 'Cuba',
3979         'CW': 'Curaçao',
3980         'CY': 'Cyprus',
3981         'CZ': 'Czech Republic',
3982         'DK': 'Denmark',
3983         'DJ': 'Djibouti',
3984         'DM': 'Dominica',
3985         'DO': 'Dominican Republic',
3986         'EC': 'Ecuador',
3987         'EG': 'Egypt',
3988         'SV': 'El Salvador',
3989         'GQ': 'Equatorial Guinea',
3990         'ER': 'Eritrea',
3991         'EE': 'Estonia',
3992         'ET': 'Ethiopia',
3993         'FK': 'Falkland Islands (Malvinas)',
3994         'FO': 'Faroe Islands',
3995         'FJ': 'Fiji',
3996         'FI': 'Finland',
3997         'FR': 'France',
3998         'GF': 'French Guiana',
3999         'PF': 'French Polynesia',
4000         'TF': 'French Southern Territories',
4001         'GA': 'Gabon',
4002         'GM': 'Gambia',
4003         'GE': 'Georgia',
4004         'DE': 'Germany',
4005         'GH': 'Ghana',
4006         'GI': 'Gibraltar',
4007         'GR': 'Greece',
4008         'GL': 'Greenland',
4009         'GD': 'Grenada',
4010         'GP': 'Guadeloupe',
4011         'GU': 'Guam',
4012         'GT': 'Guatemala',
4013         'GG': 'Guernsey',
4014         'GN': 'Guinea',
4015         'GW': 'Guinea-Bissau',
4016         'GY': 'Guyana',
4017         'HT': 'Haiti',
4018         'HM': 'Heard Island and McDonald Islands',
4019         'VA': 'Holy See (Vatican City State)',
4020         'HN': 'Honduras',
4021         'HK': 'Hong Kong',
4022         'HU': 'Hungary',
4023         'IS': 'Iceland',
4024         'IN': 'India',
4025         'ID': 'Indonesia',
4026         'IR': 'Iran, Islamic Republic of',
4027         'IQ': 'Iraq',
4028         'IE': 'Ireland',
4029         'IM': 'Isle of Man',
4030         'IL': 'Israel',
4031         'IT': 'Italy',
4032         'JM': 'Jamaica',
4033         'JP': 'Japan',
4034         'JE': 'Jersey',
4035         'JO': 'Jordan',
4036         'KZ': 'Kazakhstan',
4037         'KE': 'Kenya',
4038         'KI': 'Kiribati',
4039         'KP': 'Korea, Democratic People\'s Republic of',
4040         'KR': 'Korea, Republic of',
4041         'KW': 'Kuwait',
4042         'KG': 'Kyrgyzstan',
4043         'LA': 'Lao People\'s Democratic Republic',
4044         'LV': 'Latvia',
4045         'LB': 'Lebanon',
4046         'LS': 'Lesotho',
4047         'LR': 'Liberia',
4048         'LY': 'Libya',
4049         'LI': 'Liechtenstein',
4050         'LT': 'Lithuania',
4051         'LU': 'Luxembourg',
4052         'MO': 'Macao',
4053         'MK': 'Macedonia, the Former Yugoslav Republic of',
4054         'MG': 'Madagascar',
4055         'MW': 'Malawi',
4056         'MY': 'Malaysia',
4057         'MV': 'Maldives',
4058         'ML': 'Mali',
4059         'MT': 'Malta',
4060         'MH': 'Marshall Islands',
4061         'MQ': 'Martinique',
4062         'MR': 'Mauritania',
4063         'MU': 'Mauritius',
4064         'YT': 'Mayotte',
4065         'MX': 'Mexico',
4066         'FM': 'Micronesia, Federated States of',
4067         'MD': 'Moldova, Republic of',
4068         'MC': 'Monaco',
4069         'MN': 'Mongolia',
4070         'ME': 'Montenegro',
4071         'MS': 'Montserrat',
4072         'MA': 'Morocco',
4073         'MZ': 'Mozambique',
4074         'MM': 'Myanmar',
4075         'NA': 'Namibia',
4076         'NR': 'Nauru',
4077         'NP': 'Nepal',
4078         'NL': 'Netherlands',
4079         'NC': 'New Caledonia',
4080         'NZ': 'New Zealand',
4081         'NI': 'Nicaragua',
4082         'NE': 'Niger',
4083         'NG': 'Nigeria',
4084         'NU': 'Niue',
4085         'NF': 'Norfolk Island',
4086         'MP': 'Northern Mariana Islands',
4087         'NO': 'Norway',
4088         'OM': 'Oman',
4089         'PK': 'Pakistan',
4090         'PW': 'Palau',
4091         'PS': 'Palestine, State of',
4092         'PA': 'Panama',
4093         'PG': 'Papua New Guinea',
4094         'PY': 'Paraguay',
4095         'PE': 'Peru',
4096         'PH': 'Philippines',
4097         'PN': 'Pitcairn',
4098         'PL': 'Poland',
4099         'PT': 'Portugal',
4100         'PR': 'Puerto Rico',
4101         'QA': 'Qatar',
4102         'RE': 'Réunion',
4103         'RO': 'Romania',
4104         'RU': 'Russian Federation',
4105         'RW': 'Rwanda',
4106         'BL': 'Saint Barthélemy',
4107         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4108         'KN': 'Saint Kitts and Nevis',
4109         'LC': 'Saint Lucia',
4110         'MF': 'Saint Martin (French part)',
4111         'PM': 'Saint Pierre and Miquelon',
4112         'VC': 'Saint Vincent and the Grenadines',
4113         'WS': 'Samoa',
4114         'SM': 'San Marino',
4115         'ST': 'Sao Tome and Principe',
4116         'SA': 'Saudi Arabia',
4117         'SN': 'Senegal',
4118         'RS': 'Serbia',
4119         'SC': 'Seychelles',
4120         'SL': 'Sierra Leone',
4121         'SG': 'Singapore',
4122         'SX': 'Sint Maarten (Dutch part)',
4123         'SK': 'Slovakia',
4124         'SI': 'Slovenia',
4125         'SB': 'Solomon Islands',
4126         'SO': 'Somalia',
4127         'ZA': 'South Africa',
4128         'GS': 'South Georgia and the South Sandwich Islands',
4129         'SS': 'South Sudan',
4130         'ES': 'Spain',
4131         'LK': 'Sri Lanka',
4132         'SD': 'Sudan',
4133         'SR': 'Suriname',
4134         'SJ': 'Svalbard and Jan Mayen',
4135         'SZ': 'Swaziland',
4136         'SE': 'Sweden',
4137         'CH': 'Switzerland',
4138         'SY': 'Syrian Arab Republic',
4139         'TW': 'Taiwan, Province of China',
4140         'TJ': 'Tajikistan',
4141         'TZ': 'Tanzania, United Republic of',
4142         'TH': 'Thailand',
4143         'TL': 'Timor-Leste',
4144         'TG': 'Togo',
4145         'TK': 'Tokelau',
4146         'TO': 'Tonga',
4147         'TT': 'Trinidad and Tobago',
4148         'TN': 'Tunisia',
4149         'TR': 'Turkey',
4150         'TM': 'Turkmenistan',
4151         'TC': 'Turks and Caicos Islands',
4152         'TV': 'Tuvalu',
4153         'UG': 'Uganda',
4154         'UA': 'Ukraine',
4155         'AE': 'United Arab Emirates',
4156         'GB': 'United Kingdom',
4157         'US': 'United States',
4158         'UM': 'United States Minor Outlying Islands',
4159         'UY': 'Uruguay',
4160         'UZ': 'Uzbekistan',
4161         'VU': 'Vanuatu',
4162         'VE': 'Venezuela, Bolivarian Republic of',
4163         'VN': 'Viet Nam',
4164         'VG': 'Virgin Islands, British',
4165         'VI': 'Virgin Islands, U.S.',
4166         'WF': 'Wallis and Futuna',
4167         'EH': 'Western Sahara',
4168         'YE': 'Yemen',
4169         'ZM': 'Zambia',
4170         'ZW': 'Zimbabwe',
4171         # Not ISO 3166 codes, but used for IP blocks
4172         'AP': 'Asia/Pacific Region',
4173         'EU': 'Europe',
4174     }
4175
4176     @classmethod
4177     def short2full(cls, code):
4178         """Convert an ISO 3166-2 country code to the corresponding full name"""
4179         return cls._country_map.get(code.upper())
4180
4181
4182 class GeoUtils:
4183     # Major IPv4 address blocks per country
4184     _country_ip_map = {
4185         'AD': '46.172.224.0/19',
4186         'AE': '94.200.0.0/13',
4187         'AF': '149.54.0.0/17',
4188         'AG': '209.59.64.0/18',
4189         'AI': '204.14.248.0/21',
4190         'AL': '46.99.0.0/16',
4191         'AM': '46.70.0.0/15',
4192         'AO': '105.168.0.0/13',
4193         'AP': '182.50.184.0/21',
4194         'AQ': '23.154.160.0/24',
4195         'AR': '181.0.0.0/12',
4196         'AS': '202.70.112.0/20',
4197         'AT': '77.116.0.0/14',
4198         'AU': '1.128.0.0/11',
4199         'AW': '181.41.0.0/18',
4200         'AX': '185.217.4.0/22',
4201         'AZ': '5.197.0.0/16',
4202         'BA': '31.176.128.0/17',
4203         'BB': '65.48.128.0/17',
4204         'BD': '114.130.0.0/16',
4205         'BE': '57.0.0.0/8',
4206         'BF': '102.178.0.0/15',
4207         'BG': '95.42.0.0/15',
4208         'BH': '37.131.0.0/17',
4209         'BI': '154.117.192.0/18',
4210         'BJ': '137.255.0.0/16',
4211         'BL': '185.212.72.0/23',
4212         'BM': '196.12.64.0/18',
4213         'BN': '156.31.0.0/16',
4214         'BO': '161.56.0.0/16',
4215         'BQ': '161.0.80.0/20',
4216         'BR': '191.128.0.0/12',
4217         'BS': '24.51.64.0/18',
4218         'BT': '119.2.96.0/19',
4219         'BW': '168.167.0.0/16',
4220         'BY': '178.120.0.0/13',
4221         'BZ': '179.42.192.0/18',
4222         'CA': '99.224.0.0/11',
4223         'CD': '41.243.0.0/16',
4224         'CF': '197.242.176.0/21',
4225         'CG': '160.113.0.0/16',
4226         'CH': '85.0.0.0/13',
4227         'CI': '102.136.0.0/14',
4228         'CK': '202.65.32.0/19',
4229         'CL': '152.172.0.0/14',
4230         'CM': '102.244.0.0/14',
4231         'CN': '36.128.0.0/10',
4232         'CO': '181.240.0.0/12',
4233         'CR': '201.192.0.0/12',
4234         'CU': '152.206.0.0/15',
4235         'CV': '165.90.96.0/19',
4236         'CW': '190.88.128.0/17',
4237         'CY': '31.153.0.0/16',
4238         'CZ': '88.100.0.0/14',
4239         'DE': '53.0.0.0/8',
4240         'DJ': '197.241.0.0/17',
4241         'DK': '87.48.0.0/12',
4242         'DM': '192.243.48.0/20',
4243         'DO': '152.166.0.0/15',
4244         'DZ': '41.96.0.0/12',
4245         'EC': '186.68.0.0/15',
4246         'EE': '90.190.0.0/15',
4247         'EG': '156.160.0.0/11',
4248         'ER': '196.200.96.0/20',
4249         'ES': '88.0.0.0/11',
4250         'ET': '196.188.0.0/14',
4251         'EU': '2.16.0.0/13',
4252         'FI': '91.152.0.0/13',
4253         'FJ': '144.120.0.0/16',
4254         'FK': '80.73.208.0/21',
4255         'FM': '119.252.112.0/20',
4256         'FO': '88.85.32.0/19',
4257         'FR': '90.0.0.0/9',
4258         'GA': '41.158.0.0/15',
4259         'GB': '25.0.0.0/8',
4260         'GD': '74.122.88.0/21',
4261         'GE': '31.146.0.0/16',
4262         'GF': '161.22.64.0/18',
4263         'GG': '62.68.160.0/19',
4264         'GH': '154.160.0.0/12',
4265         'GI': '95.164.0.0/16',
4266         'GL': '88.83.0.0/19',
4267         'GM': '160.182.0.0/15',
4268         'GN': '197.149.192.0/18',
4269         'GP': '104.250.0.0/19',
4270         'GQ': '105.235.224.0/20',
4271         'GR': '94.64.0.0/13',
4272         'GT': '168.234.0.0/16',
4273         'GU': '168.123.0.0/16',
4274         'GW': '197.214.80.0/20',
4275         'GY': '181.41.64.0/18',
4276         'HK': '113.252.0.0/14',
4277         'HN': '181.210.0.0/16',
4278         'HR': '93.136.0.0/13',
4279         'HT': '148.102.128.0/17',
4280         'HU': '84.0.0.0/14',
4281         'ID': '39.192.0.0/10',
4282         'IE': '87.32.0.0/12',
4283         'IL': '79.176.0.0/13',
4284         'IM': '5.62.80.0/20',
4285         'IN': '117.192.0.0/10',
4286         'IO': '203.83.48.0/21',
4287         'IQ': '37.236.0.0/14',
4288         'IR': '2.176.0.0/12',
4289         'IS': '82.221.0.0/16',
4290         'IT': '79.0.0.0/10',
4291         'JE': '87.244.64.0/18',
4292         'JM': '72.27.0.0/17',
4293         'JO': '176.29.0.0/16',
4294         'JP': '133.0.0.0/8',
4295         'KE': '105.48.0.0/12',
4296         'KG': '158.181.128.0/17',
4297         'KH': '36.37.128.0/17',
4298         'KI': '103.25.140.0/22',
4299         'KM': '197.255.224.0/20',
4300         'KN': '198.167.192.0/19',
4301         'KP': '175.45.176.0/22',
4302         'KR': '175.192.0.0/10',
4303         'KW': '37.36.0.0/14',
4304         'KY': '64.96.0.0/15',
4305         'KZ': '2.72.0.0/13',
4306         'LA': '115.84.64.0/18',
4307         'LB': '178.135.0.0/16',
4308         'LC': '24.92.144.0/20',
4309         'LI': '82.117.0.0/19',
4310         'LK': '112.134.0.0/15',
4311         'LR': '102.183.0.0/16',
4312         'LS': '129.232.0.0/17',
4313         'LT': '78.56.0.0/13',
4314         'LU': '188.42.0.0/16',
4315         'LV': '46.109.0.0/16',
4316         'LY': '41.252.0.0/14',
4317         'MA': '105.128.0.0/11',
4318         'MC': '88.209.64.0/18',
4319         'MD': '37.246.0.0/16',
4320         'ME': '178.175.0.0/17',
4321         'MF': '74.112.232.0/21',
4322         'MG': '154.126.0.0/17',
4323         'MH': '117.103.88.0/21',
4324         'MK': '77.28.0.0/15',
4325         'ML': '154.118.128.0/18',
4326         'MM': '37.111.0.0/17',
4327         'MN': '49.0.128.0/17',
4328         'MO': '60.246.0.0/16',
4329         'MP': '202.88.64.0/20',
4330         'MQ': '109.203.224.0/19',
4331         'MR': '41.188.64.0/18',
4332         'MS': '208.90.112.0/22',
4333         'MT': '46.11.0.0/16',
4334         'MU': '105.16.0.0/12',
4335         'MV': '27.114.128.0/18',
4336         'MW': '102.70.0.0/15',
4337         'MX': '187.192.0.0/11',
4338         'MY': '175.136.0.0/13',
4339         'MZ': '197.218.0.0/15',
4340         'NA': '41.182.0.0/16',
4341         'NC': '101.101.0.0/18',
4342         'NE': '197.214.0.0/18',
4343         'NF': '203.17.240.0/22',
4344         'NG': '105.112.0.0/12',
4345         'NI': '186.76.0.0/15',
4346         'NL': '145.96.0.0/11',
4347         'NO': '84.208.0.0/13',
4348         'NP': '36.252.0.0/15',
4349         'NR': '203.98.224.0/19',
4350         'NU': '49.156.48.0/22',
4351         'NZ': '49.224.0.0/14',
4352         'OM': '5.36.0.0/15',
4353         'PA': '186.72.0.0/15',
4354         'PE': '186.160.0.0/14',
4355         'PF': '123.50.64.0/18',
4356         'PG': '124.240.192.0/19',
4357         'PH': '49.144.0.0/13',
4358         'PK': '39.32.0.0/11',
4359         'PL': '83.0.0.0/11',
4360         'PM': '70.36.0.0/20',
4361         'PR': '66.50.0.0/16',
4362         'PS': '188.161.0.0/16',
4363         'PT': '85.240.0.0/13',
4364         'PW': '202.124.224.0/20',
4365         'PY': '181.120.0.0/14',
4366         'QA': '37.210.0.0/15',
4367         'RE': '102.35.0.0/16',
4368         'RO': '79.112.0.0/13',
4369         'RS': '93.86.0.0/15',
4370         'RU': '5.136.0.0/13',
4371         'RW': '41.186.0.0/16',
4372         'SA': '188.48.0.0/13',
4373         'SB': '202.1.160.0/19',
4374         'SC': '154.192.0.0/11',
4375         'SD': '102.120.0.0/13',
4376         'SE': '78.64.0.0/12',
4377         'SG': '8.128.0.0/10',
4378         'SI': '188.196.0.0/14',
4379         'SK': '78.98.0.0/15',
4380         'SL': '102.143.0.0/17',
4381         'SM': '89.186.32.0/19',
4382         'SN': '41.82.0.0/15',
4383         'SO': '154.115.192.0/18',
4384         'SR': '186.179.128.0/17',
4385         'SS': '105.235.208.0/21',
4386         'ST': '197.159.160.0/19',
4387         'SV': '168.243.0.0/16',
4388         'SX': '190.102.0.0/20',
4389         'SY': '5.0.0.0/16',
4390         'SZ': '41.84.224.0/19',
4391         'TC': '65.255.48.0/20',
4392         'TD': '154.68.128.0/19',
4393         'TG': '196.168.0.0/14',
4394         'TH': '171.96.0.0/13',
4395         'TJ': '85.9.128.0/18',
4396         'TK': '27.96.24.0/21',
4397         'TL': '180.189.160.0/20',
4398         'TM': '95.85.96.0/19',
4399         'TN': '197.0.0.0/11',
4400         'TO': '175.176.144.0/21',
4401         'TR': '78.160.0.0/11',
4402         'TT': '186.44.0.0/15',
4403         'TV': '202.2.96.0/19',
4404         'TW': '120.96.0.0/11',
4405         'TZ': '156.156.0.0/14',
4406         'UA': '37.52.0.0/14',
4407         'UG': '102.80.0.0/13',
4408         'US': '6.0.0.0/8',
4409         'UY': '167.56.0.0/13',
4410         'UZ': '84.54.64.0/18',
4411         'VA': '212.77.0.0/19',
4412         'VC': '207.191.240.0/21',
4413         'VE': '186.88.0.0/13',
4414         'VG': '66.81.192.0/20',
4415         'VI': '146.226.0.0/16',
4416         'VN': '14.160.0.0/11',
4417         'VU': '202.80.32.0/20',
4418         'WF': '117.20.32.0/21',
4419         'WS': '202.4.32.0/19',
4420         'YE': '134.35.0.0/16',
4421         'YT': '41.242.116.0/22',
4422         'ZA': '41.0.0.0/11',
4423         'ZM': '102.144.0.0/13',
4424         'ZW': '102.177.192.0/18',
4425     }
4426
4427     @classmethod
4428     def random_ipv4(cls, code_or_block):
4429         if len(code_or_block) == 2:
4430             block = cls._country_ip_map.get(code_or_block.upper())
4431             if not block:
4432                 return None
4433         else:
4434             block = code_or_block
4435         addr, preflen = block.split('/')
4436         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
4437         addr_max = addr_min | (0xffffffff >> int(preflen))
4438         return compat_str(socket.inet_ntoa(
4439             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
4440
4441
4442 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
4443     def __init__(self, proxies=None):
4444         # Set default handlers
4445         for type in ('http', 'https'):
4446             setattr(self, '%s_open' % type,
4447                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4448                         meth(r, proxy, type))
4449         compat_urllib_request.ProxyHandler.__init__(self, proxies)
4450
4451     def proxy_open(self, req, proxy, type):
4452         req_proxy = req.headers.get('Ytdl-request-proxy')
4453         if req_proxy is not None:
4454             proxy = req_proxy
4455             del req.headers['Ytdl-request-proxy']
4456
4457         if proxy == '__noproxy__':
4458             return None  # No Proxy
4459         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4460             req.add_header('Ytdl-socks-proxy', proxy)
4461             # yt-dlp's http/https handlers do wrapping the socket with socks
4462             return None
4463         return compat_urllib_request.ProxyHandler.proxy_open(
4464             self, req, proxy, type)
4465
4466
4467 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4468 # released into Public Domain
4469 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4470
4471 def long_to_bytes(n, blocksize=0):
4472     """long_to_bytes(n:long, blocksize:int) : string
4473     Convert a long integer to a byte string.
4474
4475     If optional blocksize is given and greater than zero, pad the front of the
4476     byte string with binary zeros so that the length is a multiple of
4477     blocksize.
4478     """
4479     # after much testing, this algorithm was deemed to be the fastest
4480     s = b''
4481     n = int(n)
4482     while n > 0:
4483         s = compat_struct_pack('>I', n & 0xffffffff) + s
4484         n = n >> 32
4485     # strip off leading zeros
4486     for i in range(len(s)):
4487         if s[i] != b'\000'[0]:
4488             break
4489     else:
4490         # only happens when n == 0
4491         s = b'\000'
4492         i = 0
4493     s = s[i:]
4494     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4495     # de-padding being done above, but sigh...
4496     if blocksize > 0 and len(s) % blocksize:
4497         s = (blocksize - len(s) % blocksize) * b'\000' + s
4498     return s
4499
4500
4501 def bytes_to_long(s):
4502     """bytes_to_long(string) : long
4503     Convert a byte string to a long integer.
4504
4505     This is (essentially) the inverse of long_to_bytes().
4506     """
4507     acc = 0
4508     length = len(s)
4509     if length % 4:
4510         extra = (4 - length % 4)
4511         s = b'\000' * extra + s
4512         length = length + extra
4513     for i in range(0, length, 4):
4514         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
4515     return acc
4516
4517
4518 def ohdave_rsa_encrypt(data, exponent, modulus):
4519     '''
4520     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4521
4522     Input:
4523         data: data to encrypt, bytes-like object
4524         exponent, modulus: parameter e and N of RSA algorithm, both integer
4525     Output: hex string of encrypted data
4526
4527     Limitation: supports one block encryption only
4528     '''
4529
4530     payload = int(binascii.hexlify(data[::-1]), 16)
4531     encrypted = pow(payload, exponent, modulus)
4532     return '%x' % encrypted
4533
4534
4535 def pkcs1pad(data, length):
4536     """
4537     Padding input data with PKCS#1 scheme
4538
4539     @param {int[]} data        input data
4540     @param {int}   length      target length
4541     @returns {int[]}           padded data
4542     """
4543     if len(data) > length - 11:
4544         raise ValueError('Input data too long for PKCS#1 padding')
4545
4546     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4547     return [0, 2] + pseudo_random + [0] + data
4548
4549
4550 def encode_base_n(num, n, table=None):
4551     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
4552     if not table:
4553         table = FULL_TABLE[:n]
4554
4555     if n > len(table):
4556         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
4557
4558     if num == 0:
4559         return table[0]
4560
4561     ret = ''
4562     while num:
4563         ret = table[num % n] + ret
4564         num = num // n
4565     return ret
4566
4567
4568 def decode_packed_codes(code):
4569     mobj = re.search(PACKED_CODES_RE, code)
4570     obfuscated_code, base, count, symbols = mobj.groups()
4571     base = int(base)
4572     count = int(count)
4573     symbols = symbols.split('|')
4574     symbol_table = {}
4575
4576     while count:
4577         count -= 1
4578         base_n_count = encode_base_n(count, base)
4579         symbol_table[base_n_count] = symbols[count] or base_n_count
4580
4581     return re.sub(
4582         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4583         obfuscated_code)
4584
4585
4586 def caesar(s, alphabet, shift):
4587     if shift == 0:
4588         return s
4589     l = len(alphabet)
4590     return ''.join(
4591         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4592         for c in s)
4593
4594
4595 def rot47(s):
4596     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4597
4598
4599 def parse_m3u8_attributes(attrib):
4600     info = {}
4601     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4602         if val.startswith('"'):
4603             val = val[1:-1]
4604         info[key] = val
4605     return info
4606
4607
4608 def urshift(val, n):
4609     return val >> n if val >= 0 else (val + 0x100000000) >> n
4610
4611
4612 # Based on png2str() written by @gdkchan and improved by @yokrysty
4613 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4614 def decode_png(png_data):
4615     # Reference: https://www.w3.org/TR/PNG/
4616     header = png_data[8:]
4617
4618     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4619         raise OSError('Not a valid PNG file.')
4620
4621     int_map = {1: '>B', 2: '>H', 4: '>I'}
4622     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
4623
4624     chunks = []
4625
4626     while header:
4627         length = unpack_integer(header[:4])
4628         header = header[4:]
4629
4630         chunk_type = header[:4]
4631         header = header[4:]
4632
4633         chunk_data = header[:length]
4634         header = header[length:]
4635
4636         header = header[4:]  # Skip CRC
4637
4638         chunks.append({
4639             'type': chunk_type,
4640             'length': length,
4641             'data': chunk_data
4642         })
4643
4644     ihdr = chunks[0]['data']
4645
4646     width = unpack_integer(ihdr[:4])
4647     height = unpack_integer(ihdr[4:8])
4648
4649     idat = b''
4650
4651     for chunk in chunks:
4652         if chunk['type'] == b'IDAT':
4653             idat += chunk['data']
4654
4655     if not idat:
4656         raise OSError('Unable to read PNG data.')
4657
4658     decompressed_data = bytearray(zlib.decompress(idat))
4659
4660     stride = width * 3
4661     pixels = []
4662
4663     def _get_pixel(idx):
4664         x = idx % stride
4665         y = idx // stride
4666         return pixels[y][x]
4667
4668     for y in range(height):
4669         basePos = y * (1 + stride)
4670         filter_type = decompressed_data[basePos]
4671
4672         current_row = []
4673
4674         pixels.append(current_row)
4675
4676         for x in range(stride):
4677             color = decompressed_data[1 + basePos + x]
4678             basex = y * stride + x
4679             left = 0
4680             up = 0
4681
4682             if x > 2:
4683                 left = _get_pixel(basex - 3)
4684             if y > 0:
4685                 up = _get_pixel(basex - stride)
4686
4687             if filter_type == 1:  # Sub
4688                 color = (color + left) & 0xff
4689             elif filter_type == 2:  # Up
4690                 color = (color + up) & 0xff
4691             elif filter_type == 3:  # Average
4692                 color = (color + ((left + up) >> 1)) & 0xff
4693             elif filter_type == 4:  # Paeth
4694                 a = left
4695                 b = up
4696                 c = 0
4697
4698                 if x > 2 and y > 0:
4699                     c = _get_pixel(basex - stride - 3)
4700
4701                 p = a + b - c
4702
4703                 pa = abs(p - a)
4704                 pb = abs(p - b)
4705                 pc = abs(p - c)
4706
4707                 if pa <= pb and pa <= pc:
4708                     color = (color + a) & 0xff
4709                 elif pb <= pc:
4710                     color = (color + b) & 0xff
4711                 else:
4712                     color = (color + c) & 0xff
4713
4714             current_row.append(color)
4715
4716     return width, height, pixels
4717
4718
4719 def write_xattr(path, key, value):
4720     # Windows: Write xattrs to NTFS Alternate Data Streams:
4721     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4722     if compat_os_name == 'nt':
4723         assert ':' not in key
4724         assert os.path.exists(path)
4725
4726         try:
4727             with open(f'{path}:{key}', 'wb') as f:
4728                 f.write(value)
4729         except OSError as e:
4730             raise XAttrMetadataError(e.errno, e.strerror)
4731         return
4732
4733     # UNIX Method 1. Use xattrs/pyxattrs modules
4734     from .dependencies import xattr
4735
4736     setxattr = None
4737     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
4738         # Unicode arguments are not supported in pyxattr until version 0.5.0
4739         # See https://github.com/ytdl-org/youtube-dl/issues/5498
4740         if version_tuple(xattr.__version__) >= (0, 5, 0):
4741             setxattr = xattr.set
4742     elif xattr:
4743         setxattr = xattr.setxattr
4744
4745     if setxattr:
4746         try:
4747             setxattr(path, key, value)
4748         except OSError as e:
4749             raise XAttrMetadataError(e.errno, e.strerror)
4750         return
4751
4752     # UNIX Method 2. Use setfattr/xattr executables
4753     exe = ('setfattr' if check_executable('setfattr', ['--version'])
4754            else 'xattr' if check_executable('xattr', ['-h']) else None)
4755     if not exe:
4756         raise XAttrUnavailableError(
4757             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
4758             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
4759
4760     value = value.decode()
4761     try:
4762         p = Popen(
4763             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
4764             stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
4765     except OSError as e:
4766         raise XAttrMetadataError(e.errno, e.strerror)
4767     stderr = p.communicate_or_kill()[1].decode('utf-8', 'replace')
4768     if p.returncode:
4769         raise XAttrMetadataError(p.returncode, stderr)
4770
4771
4772 def random_birthday(year_field, month_field, day_field):
4773     start_date = datetime.date(1950, 1, 1)
4774     end_date = datetime.date(1995, 12, 31)
4775     offset = random.randint(0, (end_date - start_date).days)
4776     random_date = start_date + datetime.timedelta(offset)
4777     return {
4778         year_field: str(random_date.year),
4779         month_field: str(random_date.month),
4780         day_field: str(random_date.day),
4781     }
4782
4783
4784 # Templates for internet shortcut files, which are plain text files.
4785 DOT_URL_LINK_TEMPLATE = '''\
4786 [InternetShortcut]
4787 URL=%(url)s
4788 '''
4789
4790 DOT_WEBLOC_LINK_TEMPLATE = '''\
4791 <?xml version="1.0" encoding="UTF-8"?>
4792 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4793 <plist version="1.0">
4794 <dict>
4795 \t<key>URL</key>
4796 \t<string>%(url)s</string>
4797 </dict>
4798 </plist>
4799 '''
4800
4801 DOT_DESKTOP_LINK_TEMPLATE = '''\
4802 [Desktop Entry]
4803 Encoding=UTF-8
4804 Name=%(filename)s
4805 Type=Link
4806 URL=%(url)s
4807 Icon=text-html
4808 '''
4809
4810 LINK_TEMPLATES = {
4811     'url': DOT_URL_LINK_TEMPLATE,
4812     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
4813     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
4814 }
4815
4816
4817 def iri_to_uri(iri):
4818     """
4819     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
4820
4821     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
4822     """
4823
4824     iri_parts = compat_urllib_parse_urlparse(iri)
4825
4826     if '[' in iri_parts.netloc:
4827         raise ValueError('IPv6 URIs are not, yet, supported.')
4828         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
4829
4830     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
4831
4832     net_location = ''
4833     if iri_parts.username:
4834         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
4835         if iri_parts.password is not None:
4836             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
4837         net_location += '@'
4838
4839     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
4840     # The 'idna' encoding produces ASCII text.
4841     if iri_parts.port is not None and iri_parts.port != 80:
4842         net_location += ':' + str(iri_parts.port)
4843
4844     return urllib.parse.urlunparse(
4845         (iri_parts.scheme,
4846             net_location,
4847
4848             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
4849
4850             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
4851             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
4852
4853             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
4854             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
4855
4856             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
4857
4858     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
4859
4860
4861 def to_high_limit_path(path):
4862     if sys.platform in ['win32', 'cygwin']:
4863         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
4864         return '\\\\?\\' + os.path.abspath(path)
4865
4866     return path
4867
4868
4869 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
4870     val = traverse_obj(obj, *variadic(field))
4871     if val in ignore:
4872         return default
4873     return template % (func(val) if func else val)
4874
4875
4876 def clean_podcast_url(url):
4877     return re.sub(r'''(?x)
4878         (?:
4879             (?:
4880                 chtbl\.com/track|
4881                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
4882                 play\.podtrac\.com
4883             )/[^/]+|
4884             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
4885             flex\.acast\.com|
4886             pd(?:
4887                 cn\.co| # https://podcorn.com/analytics-prefix/
4888                 st\.fm # https://podsights.com/docs/
4889             )/e
4890         )/''', '', url)
4891
4892
4893 _HEX_TABLE = '0123456789abcdef'
4894
4895
4896 def random_uuidv4():
4897     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
4898
4899
4900 def make_dir(path, to_screen=None):
4901     try:
4902         dn = os.path.dirname(path)
4903         if dn and not os.path.exists(dn):
4904             os.makedirs(dn)
4905         return True
4906     except OSError as err:
4907         if callable(to_screen) is not None:
4908             to_screen('unable to create directory ' + error_to_compat_str(err))
4909         return False
4910
4911
4912 def get_executable_path():
4913     from zipimport import zipimporter
4914     if hasattr(sys, 'frozen'):  # Running from PyInstaller
4915         path = os.path.dirname(sys.executable)
4916     elif isinstance(__loader__, zipimporter):  # Running from ZIP
4917         path = os.path.join(os.path.dirname(__file__), '../..')
4918     else:
4919         path = os.path.join(os.path.dirname(__file__), '..')
4920     return os.path.abspath(path)
4921
4922
4923 def load_plugins(name, suffix, namespace):
4924     classes = {}
4925     with contextlib.suppress(FileNotFoundError):
4926         plugins_spec = importlib.util.spec_from_file_location(
4927             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
4928         plugins = importlib.util.module_from_spec(plugins_spec)
4929         sys.modules[plugins_spec.name] = plugins
4930         plugins_spec.loader.exec_module(plugins)
4931         for name in dir(plugins):
4932             if name in namespace:
4933                 continue
4934             if not name.endswith(suffix):
4935                 continue
4936             klass = getattr(plugins, name)
4937             classes[name] = namespace[name] = klass
4938     return classes
4939
4940
4941 def traverse_obj(
4942         obj, *path_list, default=None, expected_type=None, get_all=True,
4943         casesense=True, is_user_input=False, traverse_string=False):
4944     ''' Traverse nested list/dict/tuple
4945     @param path_list        A list of paths which are checked one by one.
4946                             Each path is a list of keys where each key is a:
4947                               - None:     Do nothing
4948                               - string:   A dictionary key
4949                               - int:      An index into a list
4950                               - tuple:    A list of keys all of which will be traversed
4951                               - Ellipsis: Fetch all values in the object
4952                               - Function: Takes the key and value as arguments
4953                                           and returns whether the key matches or not
4954     @param default          Default value to return
4955     @param expected_type    Only accept final value of this type (Can also be any callable)
4956     @param get_all          Return all the values obtained from a path or only the first one
4957     @param casesense        Whether to consider dictionary keys as case sensitive
4958     @param is_user_input    Whether the keys are generated from user input. If True,
4959                             strings are converted to int/slice if necessary
4960     @param traverse_string  Whether to traverse inside strings. If True, any
4961                             non-compatible object will also be converted into a string
4962     # TODO: Write tests
4963     '''
4964     if not casesense:
4965         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
4966         path_list = (map(_lower, variadic(path)) for path in path_list)
4967
4968     def _traverse_obj(obj, path, _current_depth=0):
4969         nonlocal depth
4970         path = tuple(variadic(path))
4971         for i, key in enumerate(path):
4972             if None in (key, obj):
4973                 return obj
4974             if isinstance(key, (list, tuple)):
4975                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
4976                 key = ...
4977             if key is ...:
4978                 obj = (obj.values() if isinstance(obj, dict)
4979                        else obj if isinstance(obj, (list, tuple, LazyList))
4980                        else str(obj) if traverse_string else [])
4981                 _current_depth += 1
4982                 depth = max(depth, _current_depth)
4983                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
4984             elif callable(key):
4985                 if isinstance(obj, (list, tuple, LazyList)):
4986                     obj = enumerate(obj)
4987                 elif isinstance(obj, dict):
4988                     obj = obj.items()
4989                 else:
4990                     if not traverse_string:
4991                         return None
4992                     obj = str(obj)
4993                 _current_depth += 1
4994                 depth = max(depth, _current_depth)
4995                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
4996             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
4997                 obj = (obj.get(key) if casesense or (key in obj)
4998                        else next((v for k, v in obj.items() if _lower(k) == key), None))
4999             else:
5000                 if is_user_input:
5001                     key = (int_or_none(key) if ':' not in key
5002                            else slice(*map(int_or_none, key.split(':'))))
5003                     if key == slice(None):
5004                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
5005                 if not isinstance(key, (int, slice)):
5006                     return None
5007                 if not isinstance(obj, (list, tuple, LazyList)):
5008                     if not traverse_string:
5009                         return None
5010                     obj = str(obj)
5011                 try:
5012                     obj = obj[key]
5013                 except IndexError:
5014                     return None
5015         return obj
5016
5017     if isinstance(expected_type, type):
5018         type_test = lambda val: val if isinstance(val, expected_type) else None
5019     elif expected_type is not None:
5020         type_test = expected_type
5021     else:
5022         type_test = lambda val: val
5023
5024     for path in path_list:
5025         depth = 0
5026         val = _traverse_obj(obj, path)
5027         if val is not None:
5028             if depth:
5029                 for _ in range(depth - 1):
5030                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5031                 val = [v for v in map(type_test, val) if v is not None]
5032                 if val:
5033                     return val if get_all else val[0]
5034             else:
5035                 val = type_test(val)
5036                 if val is not None:
5037                     return val
5038     return default
5039
5040
5041 def traverse_dict(dictn, keys, casesense=True):
5042     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5043                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5044     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5045
5046
5047 def get_first(obj, keys, **kwargs):
5048     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5049
5050
5051 def variadic(x, allowed_types=(str, bytes, dict)):
5052     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5053
5054
5055 def decode_base(value, digits):
5056     # This will convert given base-x string to scalar (long or int)
5057     table = {char: index for index, char in enumerate(digits)}
5058     result = 0
5059     base = len(digits)
5060     for chr in value:
5061         result *= base
5062         result += table[chr]
5063     return result
5064
5065
5066 def time_seconds(**kwargs):
5067     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5068     return t.timestamp()
5069
5070
5071 # create a JSON Web Signature (jws) with HS256 algorithm
5072 # the resulting format is in JWS Compact Serialization
5073 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5074 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5075 def jwt_encode_hs256(payload_data, key, headers={}):
5076     header_data = {
5077         'alg': 'HS256',
5078         'typ': 'JWT',
5079     }
5080     if headers:
5081         header_data.update(headers)
5082     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5083     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5084     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5085     signature_b64 = base64.b64encode(h.digest())
5086     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5087     return token
5088
5089
5090 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5091 def jwt_decode_hs256(jwt):
5092     header_b64, payload_b64, signature_b64 = jwt.split('.')
5093     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5094     return payload_data
5095
5096
5097 @functools.cache
5098 def supports_terminal_sequences(stream):
5099     if compat_os_name == 'nt':
5100         from .compat import WINDOWS_VT_MODE  # Must be imported locally
5101         if not WINDOWS_VT_MODE or get_windows_version() < (10, 0, 10586):
5102             return False
5103     elif not os.getenv('TERM'):
5104         return False
5105     try:
5106         return stream.isatty()
5107     except BaseException:
5108         return False
5109
5110
5111 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5112
5113
5114 def remove_terminal_sequences(string):
5115     return _terminal_sequences_re.sub('', string)
5116
5117
5118 def number_of_digits(number):
5119     return len('%d' % number)
5120
5121
5122 def join_nonempty(*values, delim='-', from_dict=None):
5123     if from_dict is not None:
5124         values = map(from_dict.get, values)
5125     return delim.join(map(str, filter(None, values)))
5126
5127
5128 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5129     """
5130     Find the largest format dimensions in terms of video width and, for each thumbnail:
5131     * Modify the URL: Match the width with the provided regex and replace with the former width
5132     * Update dimensions
5133
5134     This function is useful with video services that scale the provided thumbnails on demand
5135     """
5136     _keys = ('width', 'height')
5137     max_dimensions = max(
5138         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5139         default=(0, 0))
5140     if not max_dimensions[0]:
5141         return thumbnails
5142     return [
5143         merge_dicts(
5144             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5145             dict(zip(_keys, max_dimensions)), thumbnail)
5146         for thumbnail in thumbnails
5147     ]
5148
5149
5150 def parse_http_range(range):
5151     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5152     if not range:
5153         return None, None, None
5154     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5155     if not crg:
5156         return None, None, None
5157     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5158
5159
5160 class Config:
5161     own_args = None
5162     parsed_args = None
5163     filename = None
5164     __initialized = False
5165
5166     def __init__(self, parser, label=None):
5167         self.parser, self.label = parser, label
5168         self._loaded_paths, self.configs = set(), []
5169
5170     def init(self, args=None, filename=None):
5171         assert not self.__initialized
5172         directory = ''
5173         if filename:
5174             location = os.path.realpath(filename)
5175             directory = os.path.dirname(location)
5176             if location in self._loaded_paths:
5177                 return False
5178             self._loaded_paths.add(location)
5179
5180         self.own_args, self.__initialized = args, True
5181         opts, _ = self.parser.parse_known_args(args)
5182         self.parsed_args, self.filename = args, filename
5183
5184         for location in opts.config_locations or []:
5185             location = os.path.join(directory, expand_path(location))
5186             if os.path.isdir(location):
5187                 location = os.path.join(location, 'yt-dlp.conf')
5188             if not os.path.exists(location):
5189                 self.parser.error(f'config location {location} does not exist')
5190             self.append_config(self.read_file(location), location)
5191         return True
5192
5193     def __str__(self):
5194         label = join_nonempty(
5195             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5196             delim=' ')
5197         return join_nonempty(
5198             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5199             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5200             delim='\n')
5201
5202     @staticmethod
5203     def read_file(filename, default=[]):
5204         try:
5205             optionf = open(filename)
5206         except OSError:
5207             return default  # silently skip if file is not present
5208         try:
5209             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5210             contents = optionf.read()
5211             res = shlex.split(contents, comments=True)
5212         finally:
5213             optionf.close()
5214         return res
5215
5216     @staticmethod
5217     def hide_login_info(opts):
5218         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5219         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5220
5221         def _scrub_eq(o):
5222             m = eqre.match(o)
5223             if m:
5224                 return m.group('key') + '=PRIVATE'
5225             else:
5226                 return o
5227
5228         opts = list(map(_scrub_eq, opts))
5229         for idx, opt in enumerate(opts):
5230             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5231                 opts[idx + 1] = 'PRIVATE'
5232         return opts
5233
5234     def append_config(self, *args, label=None):
5235         config = type(self)(self.parser, label)
5236         config._loaded_paths = self._loaded_paths
5237         if config.init(*args):
5238             self.configs.append(config)
5239
5240     @property
5241     def all_args(self):
5242         for config in reversed(self.configs):
5243             yield from config.all_args
5244         yield from self.parsed_args or []
5245
5246     def parse_known_args(self, **kwargs):
5247         return self.parser.parse_known_args(self.all_args, **kwargs)
5248
5249     def parse_args(self):
5250         return self.parser.parse_args(self.all_args)
5251
5252
5253 class WebSocketsWrapper():
5254     """Wraps websockets module to use in non-async scopes"""
5255     pool = None
5256
5257     def __init__(self, url, headers=None, connect=True):
5258         self.loop = asyncio.new_event_loop()
5259         # XXX: "loop" is deprecated
5260         self.conn = websockets.connect(
5261             url, extra_headers=headers, ping_interval=None,
5262             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5263         if connect:
5264             self.__enter__()
5265         atexit.register(self.__exit__, None, None, None)
5266
5267     def __enter__(self):
5268         if not self.pool:
5269             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5270         return self
5271
5272     def send(self, *args):
5273         self.run_with_loop(self.pool.send(*args), self.loop)
5274
5275     def recv(self, *args):
5276         return self.run_with_loop(self.pool.recv(*args), self.loop)
5277
5278     def __exit__(self, type, value, traceback):
5279         try:
5280             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5281         finally:
5282             self.loop.close()
5283             self._cancel_all_tasks(self.loop)
5284
5285     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5286     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5287     @staticmethod
5288     def run_with_loop(main, loop):
5289         if not asyncio.iscoroutine(main):
5290             raise ValueError(f'a coroutine was expected, got {main!r}')
5291
5292         try:
5293             return loop.run_until_complete(main)
5294         finally:
5295             loop.run_until_complete(loop.shutdown_asyncgens())
5296             if hasattr(loop, 'shutdown_default_executor'):
5297                 loop.run_until_complete(loop.shutdown_default_executor())
5298
5299     @staticmethod
5300     def _cancel_all_tasks(loop):
5301         to_cancel = asyncio.all_tasks(loop)
5302
5303         if not to_cancel:
5304             return
5305
5306         for task in to_cancel:
5307             task.cancel()
5308
5309         # XXX: "loop" is removed in python 3.10+
5310         loop.run_until_complete(
5311             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5312
5313         for task in to_cancel:
5314             if task.cancelled():
5315                 continue
5316             if task.exception() is not None:
5317                 loop.call_exception_handler({
5318                     'message': 'unhandled exception during asyncio.run() shutdown',
5319                     'exception': task.exception(),
5320                     'task': task,
5321                 })
5322
5323
5324 def merge_headers(*dicts):
5325     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5326     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5327
5328
5329 class classproperty:
5330     def __init__(self, f):
5331         functools.update_wrapper(self, f)
5332         self.f = f
5333
5334     def __get__(self, _, cls):
5335         return self.f(cls)
5336
5337
5338 class Namespace:
5339     """Immutable namespace"""
5340
5341     def __init__(self, **kwargs):
5342         self._dict = kwargs
5343
5344     def __getattr__(self, attr):
5345         return self._dict[attr]
5346
5347     def __contains__(self, item):
5348         return item in self._dict.values()
5349
5350     def __iter__(self):
5351         return iter(self._dict.items())
5352
5353     def __repr__(self):
5354         return f'{type(self).__name__}({", ".join(f"{k}={v}" for k, v in self)})'
5355
5356
5357 # Deprecated
5358 has_certifi = bool(certifi)
5359 has_websockets = bool(websockets)