yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import contextlib
   9 import ctypes
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import importlib.util
  18 import io
  19 import itertools
  20 import json
  21 import locale
  22 import math
  23 import mimetypes
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import shlex
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import urllib.parse
  38 import xml.etree.ElementTree
  39 import zlib
  40
  41 from .compat import asyncio, functools  # isort: split
  42 from .compat import (
  43     compat_chr,
  44     compat_cookiejar,
  45     compat_etree_fromstring,
  46     compat_expanduser,
  47     compat_html_entities,
  48     compat_html_entities_html5,
  49     compat_HTMLParseError,
  50     compat_HTMLParser,
  51     compat_http_client,
  52     compat_HTTPError,
  53     compat_os_name,
  54     compat_parse_qs,
  55     compat_shlex_quote,
  56     compat_str,
  57     compat_struct_pack,
  58     compat_struct_unpack,
  59     compat_urllib_error,
  60     compat_urllib_parse_unquote_plus,
  61     compat_urllib_parse_urlencode,
  62     compat_urllib_parse_urlparse,
  63     compat_urllib_request,
  64     compat_urlparse,
  65 )
  66 from .dependencies import brotli, certifi, websockets
  67 from .socks import ProxyType, sockssocket
  68
  69
  70 def register_socks_protocols():
  71     # "Register" SOCKS protocols
  72     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  73     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  74     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  75         if scheme not in compat_urlparse.uses_netloc:
  76             compat_urlparse.uses_netloc.append(scheme)
  77
  78
  79 # This is not clearly defined otherwise
  80 compiled_regex_type = type(re.compile(''))
  81
  82
  83 def random_user_agent():
  84     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  85     _CHROME_VERSIONS = (
  86         '90.0.4430.212',
  87         '90.0.4430.24',
  88         '90.0.4430.70',
  89         '90.0.4430.72',
  90         '90.0.4430.85',
  91         '90.0.4430.93',
  92         '91.0.4472.101',
  93         '91.0.4472.106',
  94         '91.0.4472.114',
  95         '91.0.4472.124',
  96         '91.0.4472.164',
  97         '91.0.4472.19',
  98         '91.0.4472.77',
  99         '92.0.4515.107',
 100         '92.0.4515.115',
 101         '92.0.4515.131',
 102         '92.0.4515.159',
 103         '92.0.4515.43',
 104         '93.0.4556.0',
 105         '93.0.4577.15',
 106         '93.0.4577.63',
 107         '93.0.4577.82',
 108         '94.0.4606.41',
 109         '94.0.4606.54',
 110         '94.0.4606.61',
 111         '94.0.4606.71',
 112         '94.0.4606.81',
 113         '94.0.4606.85',
 114         '95.0.4638.17',
 115         '95.0.4638.50',
 116         '95.0.4638.54',
 117         '95.0.4638.69',
 118         '95.0.4638.74',
 119         '96.0.4664.18',
 120         '96.0.4664.45',
 121         '96.0.4664.55',
 122         '96.0.4664.93',
 123         '97.0.4692.20',
 124     )
 125     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 126
 127
 128 SUPPORTED_ENCODINGS = [
 129     'gzip', 'deflate'
 130 ]
 131 if brotli:
 132     SUPPORTED_ENCODINGS.append('br')
 133
 134 std_headers = {
 135     'User-Agent': random_user_agent(),
 136     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 137     'Accept-Language': 'en-us,en;q=0.5',
 138     'Sec-Fetch-Mode': 'navigate',
 139 }
 140
 141
 142 USER_AGENTS = {
 143     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 144 }
 145
 146
 147 NO_DEFAULT = object()
 148
 149 ENGLISH_MONTH_NAMES = [
 150     'January', 'February', 'March', 'April', 'May', 'June',
 151     'July', 'August', 'September', 'October', 'November', 'December']
 152
 153 MONTH_NAMES = {
 154     'en': ENGLISH_MONTH_NAMES,
 155     'fr': [
 156         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 157         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 158 }
 159
 160 KNOWN_EXTENSIONS = (
 161     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 162     'flv', 'f4v', 'f4a', 'f4b',
 163     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 164     'mkv', 'mka', 'mk3d',
 165     'avi', 'divx',
 166     'mov',
 167     'asf', 'wmv', 'wma',
 168     '3gp', '3g2',
 169     'mp3',
 170     'flac',
 171     'ape',
 172     'wav',
 173     'f4f', 'f4m', 'm3u8', 'smil')
 174
 175 # needed for sanitizing filenames in restricted mode
 176 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 177                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 178                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 179
 180 DATE_FORMATS = (
 181     '%d %B %Y',
 182     '%d %b %Y',
 183     '%B %d %Y',
 184     '%B %dst %Y',
 185     '%B %dnd %Y',
 186     '%B %drd %Y',
 187     '%B %dth %Y',
 188     '%b %d %Y',
 189     '%b %dst %Y',
 190     '%b %dnd %Y',
 191     '%b %drd %Y',
 192     '%b %dth %Y',
 193     '%b %dst %Y %I:%M',
 194     '%b %dnd %Y %I:%M',
 195     '%b %drd %Y %I:%M',
 196     '%b %dth %Y %I:%M',
 197     '%Y %m %d',
 198     '%Y-%m-%d',
 199     '%Y.%m.%d.',
 200     '%Y/%m/%d',
 201     '%Y/%m/%d %H:%M',
 202     '%Y/%m/%d %H:%M:%S',
 203     '%Y%m%d%H%M',
 204     '%Y%m%d%H%M%S',
 205     '%Y%m%d',
 206     '%Y-%m-%d %H:%M',
 207     '%Y-%m-%d %H:%M:%S',
 208     '%Y-%m-%d %H:%M:%S.%f',
 209     '%Y-%m-%d %H:%M:%S:%f',
 210     '%d.%m.%Y %H:%M',
 211     '%d.%m.%Y %H.%M',
 212     '%Y-%m-%dT%H:%M:%SZ',
 213     '%Y-%m-%dT%H:%M:%S.%fZ',
 214     '%Y-%m-%dT%H:%M:%S.%f0Z',
 215     '%Y-%m-%dT%H:%M:%S',
 216     '%Y-%m-%dT%H:%M:%S.%f',
 217     '%Y-%m-%dT%H:%M',
 218     '%b %d %Y at %H:%M',
 219     '%b %d %Y at %H:%M:%S',
 220     '%B %d %Y at %H:%M',
 221     '%B %d %Y at %H:%M:%S',
 222     '%H:%M %d-%b-%Y',
 223 )
 224
 225 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 226 DATE_FORMATS_DAY_FIRST.extend([
 227     '%d-%m-%Y',
 228     '%d.%m.%Y',
 229     '%d.%m.%y',
 230     '%d/%m/%Y',
 231     '%d/%m/%y',
 232     '%d/%m/%Y %H:%M:%S',
 233 ])
 234
 235 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 236 DATE_FORMATS_MONTH_FIRST.extend([
 237     '%m-%d-%Y',
 238     '%m.%d.%Y',
 239     '%m/%d/%Y',
 240     '%m/%d/%y',
 241     '%m/%d/%Y %H:%M:%S',
 242 ])
 243
 244 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 245 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 246
 247 NUMBER_RE = r'\d+(?:\.\d+)?'
 248
 249
 250 @functools.cache
 251 def preferredencoding():
 252     """Get preferred encoding.
 253
 254     Returns the best encoding scheme for the system, based on
 255     locale.getpreferredencoding() and some further tweaks.
 256     """
 257     try:
 258         pref = locale.getpreferredencoding()
 259         'TEST'.encode(pref)
 260     except Exception:
 261         pref = 'UTF-8'
 262
 263     return pref
 264
 265
 266 def write_json_file(obj, fn):
 267     """ Encode obj as JSON and write it to fn, atomically if possible """
 268
 269     tf = tempfile.NamedTemporaryFile(
 270         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 271         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 272
 273     try:
 274         with tf:
 275             json.dump(obj, tf, ensure_ascii=False)
 276         if sys.platform == 'win32':
 277             # Need to remove existing file on Windows, else os.rename raises
 278             # WindowsError or FileExistsError.
 279             with contextlib.suppress(OSError):
 280                 os.unlink(fn)
 281         with contextlib.suppress(OSError):
 282             mask = os.umask(0)
 283             os.umask(mask)
 284             os.chmod(tf.name, 0o666 & ~mask)
 285         os.rename(tf.name, fn)
 286     except Exception:
 287         with contextlib.suppress(OSError):
 288             os.remove(tf.name)
 289         raise
 290
 291
 292 def find_xpath_attr(node, xpath, key, val=None):
 293     """ Find the xpath xpath[@key=val] """
 294     assert re.match(r'^[a-zA-Z_-]+$', key)
 295     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 296     return node.find(expr)
 297
 298 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 299 # the namespace parameter
 300
 301
 302 def xpath_with_ns(path, ns_map):
 303     components = [c.split(':') for c in path.split('/')]
 304     replaced = []
 305     for c in components:
 306         if len(c) == 1:
 307             replaced.append(c[0])
 308         else:
 309             ns, tag = c
 310             replaced.append('{%s}%s' % (ns_map[ns], tag))
 311     return '/'.join(replaced)
 312
 313
 314 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 315     def _find_xpath(xpath):
 316         return node.find(xpath)
 317
 318     if isinstance(xpath, (str, compat_str)):
 319         n = _find_xpath(xpath)
 320     else:
 321         for xp in xpath:
 322             n = _find_xpath(xp)
 323             if n is not None:
 324                 break
 325
 326     if n is None:
 327         if default is not NO_DEFAULT:
 328             return default
 329         elif fatal:
 330             name = xpath if name is None else name
 331             raise ExtractorError('Could not find XML element %s' % name)
 332         else:
 333             return None
 334     return n
 335
 336
 337 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 338     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 339     if n is None or n == default:
 340         return n
 341     if n.text is None:
 342         if default is not NO_DEFAULT:
 343             return default
 344         elif fatal:
 345             name = xpath if name is None else name
 346             raise ExtractorError('Could not find XML element\'s text %s' % name)
 347         else:
 348             return None
 349     return n.text
 350
 351
 352 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 353     n = find_xpath_attr(node, xpath, key)
 354     if n is None:
 355         if default is not NO_DEFAULT:
 356             return default
 357         elif fatal:
 358             name = f'{xpath}[@{key}]' if name is None else name
 359             raise ExtractorError('Could not find XML attribute %s' % name)
 360         else:
 361             return None
 362     return n.attrib[key]
 363
 364
 365 def get_element_by_id(id, html, **kwargs):
 366     """Return the content of the tag with the specified ID in the passed HTML document"""
 367     return get_element_by_attribute('id', id, html, **kwargs)
 368
 369
 370 def get_element_html_by_id(id, html, **kwargs):
 371     """Return the html of the tag with the specified ID in the passed HTML document"""
 372     return get_element_html_by_attribute('id', id, html, **kwargs)
 373
 374
 375 def get_element_by_class(class_name, html):
 376     """Return the content of the first tag with the specified class in the passed HTML document"""
 377     retval = get_elements_by_class(class_name, html)
 378     return retval[0] if retval else None
 379
 380
 381 def get_element_html_by_class(class_name, html):
 382     """Return the html of the first tag with the specified class in the passed HTML document"""
 383     retval = get_elements_html_by_class(class_name, html)
 384     return retval[0] if retval else None
 385
 386
 387 def get_element_by_attribute(attribute, value, html, **kwargs):
 388     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 389     return retval[0] if retval else None
 390
 391
 392 def get_element_html_by_attribute(attribute, value, html, **kargs):
 393     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 394     return retval[0] if retval else None
 395
 396
 397 def get_elements_by_class(class_name, html, **kargs):
 398     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 399     return get_elements_by_attribute(
 400         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 401         html, escape_value=False)
 402
 403
 404 def get_elements_html_by_class(class_name, html):
 405     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 406     return get_elements_html_by_attribute(
 407         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 408         html, escape_value=False)
 409
 410
 411 def get_elements_by_attribute(*args, **kwargs):
 412     """Return the content of the tag with the specified attribute in the passed HTML document"""
 413     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 414
 415
 416 def get_elements_html_by_attribute(*args, **kwargs):
 417     """Return the html of the tag with the specified attribute in the passed HTML document"""
 418     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 419
 420
 421 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 422     """
 423     Return the text (content) and the html (whole) of the tag with the specified
 424     attribute in the passed HTML document
 425     """
 426
 427     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 428
 429     value = re.escape(value) if escape_value else value
 430
 431     partial_element_re = rf'''(?x)
 432         <(?P<tag>[a-zA-Z0-9:._-]+)
 433          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 434          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 435         '''
 436
 437     for m in re.finditer(partial_element_re, html):
 438         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 439
 440         yield (
 441             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 442             whole
 443         )
 444
 445
 446 class HTMLBreakOnClosingTagParser(compat_HTMLParser):
 447     """
 448     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 449     closing tag for the first opening tag it has encountered, and can be used
 450     as a context manager
 451     """
 452
 453     class HTMLBreakOnClosingTagException(Exception):
 454         pass
 455
 456     def __init__(self):
 457         self.tagstack = collections.deque()
 458         compat_HTMLParser.__init__(self)
 459
 460     def __enter__(self):
 461         return self
 462
 463     def __exit__(self, *_):
 464         self.close()
 465
 466     def close(self):
 467         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 468         # so data remains buffered; we no longer have any interest in it, thus
 469         # override this method to discard it
 470         pass
 471
 472     def handle_starttag(self, tag, _):
 473         self.tagstack.append(tag)
 474
 475     def handle_endtag(self, tag):
 476         if not self.tagstack:
 477             raise compat_HTMLParseError('no tags in the stack')
 478         while self.tagstack:
 479             inner_tag = self.tagstack.pop()
 480             if inner_tag == tag:
 481                 break
 482         else:
 483             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 484         if not self.tagstack:
 485             raise self.HTMLBreakOnClosingTagException()
 486
 487
 488 def get_element_text_and_html_by_tag(tag, html):
 489     """
 490     For the first element with the specified tag in the passed HTML document
 491     return its' content (text) and the whole element (html)
 492     """
 493     def find_or_raise(haystack, needle, exc):
 494         try:
 495             return haystack.index(needle)
 496         except ValueError:
 497             raise exc
 498     closing_tag = f'</{tag}>'
 499     whole_start = find_or_raise(
 500         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 501     content_start = find_or_raise(
 502         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 503     content_start += whole_start + 1
 504     with HTMLBreakOnClosingTagParser() as parser:
 505         parser.feed(html[whole_start:content_start])
 506         if not parser.tagstack or parser.tagstack[0] != tag:
 507             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 508         offset = content_start
 509         while offset < len(html):
 510             next_closing_tag_start = find_or_raise(
 511                 html[offset:], closing_tag,
 512                 compat_HTMLParseError(f'closing {tag} tag not found'))
 513             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 514             try:
 515                 parser.feed(html[offset:offset + next_closing_tag_end])
 516                 offset += next_closing_tag_end
 517             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 518                 return html[content_start:offset + next_closing_tag_start], \
 519                     html[whole_start:offset + next_closing_tag_end]
 520         raise compat_HTMLParseError('unexpected end of html')
 521
 522
 523 class HTMLAttributeParser(compat_HTMLParser):
 524     """Trivial HTML parser to gather the attributes for a single element"""
 525
 526     def __init__(self):
 527         self.attrs = {}
 528         compat_HTMLParser.__init__(self)
 529
 530     def handle_starttag(self, tag, attrs):
 531         self.attrs = dict(attrs)
 532
 533
 534 class HTMLListAttrsParser(compat_HTMLParser):
 535     """HTML parser to gather the attributes for the elements of a list"""
 536
 537     def __init__(self):
 538         compat_HTMLParser.__init__(self)
 539         self.items = []
 540         self._level = 0
 541
 542     def handle_starttag(self, tag, attrs):
 543         if tag == 'li' and self._level == 0:
 544             self.items.append(dict(attrs))
 545         self._level += 1
 546
 547     def handle_endtag(self, tag):
 548         self._level -= 1
 549
 550
 551 def extract_attributes(html_element):
 552     """Given a string for an HTML element such as
 553     <el
 554          a="foo" B="bar" c="&98;az" d=boz
 555          empty= noval entity="&amp;"
 556          sq='"' dq="'"
 557     >
 558     Decode and return a dictionary of attributes.
 559     {
 560         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 561         'empty': '', 'noval': None, 'entity': '&',
 562         'sq': '"', 'dq': '\''
 563     }.
 564     """
 565     parser = HTMLAttributeParser()
 566     with contextlib.suppress(compat_HTMLParseError):
 567         parser.feed(html_element)
 568         parser.close()
 569     return parser.attrs
 570
 571
 572 def parse_list(webpage):
 573     """Given a string for an series of HTML <li> elements,
 574     return a dictionary of their attributes"""
 575     parser = HTMLListAttrsParser()
 576     parser.feed(webpage)
 577     parser.close()
 578     return parser.items
 579
 580
 581 def clean_html(html):
 582     """Clean an HTML snippet into a readable string"""
 583
 584     if html is None:  # Convenience for sanitizing descriptions etc.
 585         return html
 586
 587     html = re.sub(r'\s+', ' ', html)
 588     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 589     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 590     # Strip html tags
 591     html = re.sub('<.*?>', '', html)
 592     # Replace html entities
 593     html = unescapeHTML(html)
 594     return html.strip()
 595
 596
 597 def sanitize_open(filename, open_mode):
 598     """Try to open the given filename, and slightly tweak it if this fails.
 599
 600     Attempts to open the given filename. If this fails, it tries to change
 601     the filename slightly, step by step, until it's either able to open it
 602     or it fails and raises a final exception, like the standard open()
 603     function.
 604
 605     It returns the tuple (stream, definitive_file_name).
 606     """
 607     if filename == '-':
 608         if sys.platform == 'win32':
 609             import msvcrt
 610             msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 611         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 612
 613     for attempt in range(2):
 614         try:
 615             try:
 616                 if sys.platform == 'win32':
 617                     # FIXME: An exclusive lock also locks the file from being read.
 618                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 619                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 620                     raise LockingUnsupportedError()
 621                 stream = locked_file(filename, open_mode, block=False).__enter__()
 622             except OSError:
 623                 stream = open(filename, open_mode)
 624             return stream, filename
 625         except OSError as err:
 626             if attempt or err.errno in (errno.EACCES,):
 627                 raise
 628             old_filename, filename = filename, sanitize_path(filename)
 629             if old_filename == filename:
 630                 raise
 631
 632
 633 def timeconvert(timestr):
 634     """Convert RFC 2822 defined time string into system timestamp"""
 635     timestamp = None
 636     timetuple = email.utils.parsedate_tz(timestr)
 637     if timetuple is not None:
 638         timestamp = email.utils.mktime_tz(timetuple)
 639     return timestamp
 640
 641
 642 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 643     """Sanitizes a string so it could be used as part of a filename.
 644     @param restricted   Use a stricter subset of allowed characters
 645     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 646                         If unset, yt-dlp's new sanitization rules are in effect
 647     """
 648     if s == '':
 649         return ''
 650
 651     def replace_insane(char):
 652         if restricted and char in ACCENT_CHARS:
 653             return ACCENT_CHARS[char]
 654         elif not restricted and char == '\n':
 655             return '\0 '
 656         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 657             return ''
 658         elif char == '"':
 659             return '' if restricted else '\''
 660         elif char == ':':
 661             return '\0_\0-' if restricted else '\0 \0-'
 662         elif char in '\\/|*<>':
 663             return '\0_'
 664         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 665             return '\0_'
 666         return char
 667
 668     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 669     result = ''.join(map(replace_insane, s))
 670     if is_id is NO_DEFAULT:
 671         result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result)  # Remove repeated substitute chars
 672         STRIP_RE = '(?:\0.|[ _-])*'
 673         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 674     result = result.replace('\0', '') or '_'
 675
 676     if not is_id:
 677         while '__' in result:
 678             result = result.replace('__', '_')
 679         result = result.strip('_')
 680         # Common case of "Foreign band name - English song title"
 681         if restricted and result.startswith('-_'):
 682             result = result[2:]
 683         if result.startswith('-'):
 684             result = '_' + result[len('-'):]
 685         result = result.lstrip('.')
 686         if not result:
 687             result = '_'
 688     return result
 689
 690
 691 def sanitize_path(s, force=False):
 692     """Sanitizes and normalizes path on Windows"""
 693     if sys.platform == 'win32':
 694         force = False
 695         drive_or_unc, _ = os.path.splitdrive(s)
 696     elif force:
 697         drive_or_unc = ''
 698     else:
 699         return s
 700
 701     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 702     if drive_or_unc:
 703         norm_path.pop(0)
 704     sanitized_path = [
 705         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 706         for path_part in norm_path]
 707     if drive_or_unc:
 708         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 709     elif force and s and s[0] == os.path.sep:
 710         sanitized_path.insert(0, os.path.sep)
 711     return os.path.join(*sanitized_path)
 712
 713
 714 def sanitize_url(url):
 715     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 716     # the number of unwanted failures due to missing protocol
 717     if url is None:
 718         return
 719     elif url.startswith('//'):
 720         return 'http:%s' % url
 721     # Fix some common typos seen so far
 722     COMMON_TYPOS = (
 723         # https://github.com/ytdl-org/youtube-dl/issues/15649
 724         (r'^httpss://', r'https://'),
 725         # https://bx1.be/lives/direct-tv/
 726         (r'^rmtp([es]?)://', r'rtmp\1://'),
 727     )
 728     for mistake, fixup in COMMON_TYPOS:
 729         if re.match(mistake, url):
 730             return re.sub(mistake, fixup, url)
 731     return url
 732
 733
 734 def extract_basic_auth(url):
 735     parts = compat_urlparse.urlsplit(url)
 736     if parts.username is None:
 737         return url, None
 738     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
 739         parts.hostname if parts.port is None
 740         else '%s:%d' % (parts.hostname, parts.port))))
 741     auth_payload = base64.b64encode(
 742         ('%s:%s' % (parts.username, parts.password or '')).encode())
 743     return url, f'Basic {auth_payload.decode()}'
 744
 745
 746 def sanitized_Request(url, *args, **kwargs):
 747     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 748     if auth_header is not None:
 749         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 750         headers['Authorization'] = auth_header
 751     return compat_urllib_request.Request(url, *args, **kwargs)
 752
 753
 754 def expand_path(s):
 755     """Expand shell variables and ~"""
 756     return os.path.expandvars(compat_expanduser(s))
 757
 758
 759 def orderedSet(iterable):
 760     """ Remove all duplicates from the input iterable """
 761     res = []
 762     for el in iterable:
 763         if el not in res:
 764             res.append(el)
 765     return res
 766
 767
 768 def _htmlentity_transform(entity_with_semicolon):
 769     """Transforms an HTML entity to a character."""
 770     entity = entity_with_semicolon[:-1]
 771
 772     # Known non-numeric HTML entity
 773     if entity in compat_html_entities.name2codepoint:
 774         return compat_chr(compat_html_entities.name2codepoint[entity])
 775
 776     # TODO: HTML5 allows entities without a semicolon. For example,
 777     # '&Eacuteric' should be decoded as 'Éric'.
 778     if entity_with_semicolon in compat_html_entities_html5:
 779         return compat_html_entities_html5[entity_with_semicolon]
 780
 781     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 782     if mobj is not None:
 783         numstr = mobj.group(1)
 784         if numstr.startswith('x'):
 785             base = 16
 786             numstr = '0%s' % numstr
 787         else:
 788             base = 10
 789         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 790         with contextlib.suppress(ValueError):
 791             return compat_chr(int(numstr, base))
 792
 793     # Unknown entity in name, return its literal representation
 794     return '&%s;' % entity
 795
 796
 797 def unescapeHTML(s):
 798     if s is None:
 799         return None
 800     assert isinstance(s, str)
 801
 802     return re.sub(
 803         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 804
 805
 806 def escapeHTML(text):
 807     return (
 808         text
 809         .replace('&', '&amp;')
 810         .replace('<', '&lt;')
 811         .replace('>', '&gt;')
 812         .replace('"', '&quot;')
 813         .replace("'", '&#39;')
 814     )
 815
 816
 817 def process_communicate_or_kill(p, *args, **kwargs):
 818     write_string('DeprecationWarning: yt_dlp.utils.process_communicate_or_kill is deprecated '
 819                  'and may be removed in a future version. Use yt_dlp.utils.Popen.communicate_or_kill instead')
 820     return Popen.communicate_or_kill(p, *args, **kwargs)
 821
 822
 823 class Popen(subprocess.Popen):
 824     if sys.platform == 'win32':
 825         _startupinfo = subprocess.STARTUPINFO()
 826         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 827     else:
 828         _startupinfo = None
 829
 830     def __init__(self, *args, **kwargs):
 831         super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
 832
 833     def communicate_or_kill(self, *args, **kwargs):
 834         try:
 835             return self.communicate(*args, **kwargs)
 836         except BaseException:  # Including KeyboardInterrupt
 837             self.kill()
 838             self.wait()
 839             raise
 840
 841
 842 def get_subprocess_encoding():
 843     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 844         # For subprocess calls, encode with locale encoding
 845         # Refer to http://stackoverflow.com/a/9951851/35070
 846         encoding = preferredencoding()
 847     else:
 848         encoding = sys.getfilesystemencoding()
 849     if encoding is None:
 850         encoding = 'utf-8'
 851     return encoding
 852
 853
 854 def encodeFilename(s, for_subprocess=False):
 855     assert isinstance(s, str)
 856     return s
 857
 858
 859 def decodeFilename(b, for_subprocess=False):
 860     return b
 861
 862
 863 def encodeArgument(s):
 864     # Legacy code that uses byte strings
 865     # Uncomment the following line after fixing all post processors
 866     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 867     return s if isinstance(s, str) else s.decode('ascii')
 868
 869
 870 def decodeArgument(b):
 871     return b
 872
 873
 874 def decodeOption(optval):
 875     if optval is None:
 876         return optval
 877     if isinstance(optval, bytes):
 878         optval = optval.decode(preferredencoding())
 879
 880     assert isinstance(optval, compat_str)
 881     return optval
 882
 883
 884 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 885
 886
 887 def timetuple_from_msec(msec):
 888     secs, msec = divmod(msec, 1000)
 889     mins, secs = divmod(secs, 60)
 890     hrs, mins = divmod(mins, 60)
 891     return _timetuple(hrs, mins, secs, msec)
 892
 893
 894 def formatSeconds(secs, delim=':', msec=False):
 895     time = timetuple_from_msec(secs * 1000)
 896     if time.hours:
 897         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 898     elif time.minutes:
 899         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 900     else:
 901         ret = '%d' % time.seconds
 902     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 903
 904
 905 def _ssl_load_windows_store_certs(ssl_context, storename):
 906     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 907     try:
 908         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 909                  if encoding == 'x509_asn' and (
 910                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 911     except PermissionError:
 912         return
 913     for cert in certs:
 914         with contextlib.suppress(ssl.SSLError):
 915             ssl_context.load_verify_locations(cadata=cert)
 916
 917
 918 def make_HTTPS_handler(params, **kwargs):
 919     opts_check_certificate = not params.get('nocheckcertificate')
 920     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 921     context.check_hostname = opts_check_certificate
 922     if params.get('legacyserverconnect'):
 923         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 924         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 925         context.set_ciphers('DEFAULT')
 926
 927     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 928     if opts_check_certificate:
 929         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 930             context.load_verify_locations(cafile=certifi.where())
 931         try:
 932             context.load_default_certs()
 933         # Work around the issue in load_default_certs when there are bad certificates. See:
 934         # https://github.com/yt-dlp/yt-dlp/issues/1060,
 935         # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 936         except ssl.SSLError:
 937             # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 938             if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 939                 for storename in ('CA', 'ROOT'):
 940                     _ssl_load_windows_store_certs(context, storename)
 941             context.set_default_verify_paths()
 942
 943     client_certfile = params.get('client_certificate')
 944     if client_certfile:
 945         try:
 946             context.load_cert_chain(
 947                 client_certfile, keyfile=params.get('client_certificate_key'),
 948                 password=params.get('client_certificate_password'))
 949         except ssl.SSLError:
 950             raise YoutubeDLError('Unable to load client certificate')
 951
 952     # Some servers may reject requests if ALPN extension is not sent. See:
 953     # https://github.com/python/cpython/issues/85140
 954     # https://github.com/yt-dlp/yt-dlp/issues/3878
 955     with contextlib.suppress(NotImplementedError):
 956         context.set_alpn_protocols(['http/1.1'])
 957
 958     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 959
 960
 961 def bug_reports_message(before=';'):
 962     msg = ('please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , '
 963            'filling out the appropriate issue template. '
 964            'Confirm you are on the latest version using  yt-dlp -U')
 965
 966     before = before.rstrip()
 967     if not before or before.endswith(('.', '!', '?')):
 968         msg = msg[0].title() + msg[1:]
 969
 970     return (before + ' ' if before else '') + msg
 971
 972
 973 class YoutubeDLError(Exception):
 974     """Base exception for YoutubeDL errors."""
 975     msg = None
 976
 977     def __init__(self, msg=None):
 978         if msg is not None:
 979             self.msg = msg
 980         elif self.msg is None:
 981             self.msg = type(self).__name__
 982         super().__init__(self.msg)
 983
 984
 985 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
 986 if hasattr(ssl, 'CertificateError'):
 987     network_exceptions.append(ssl.CertificateError)
 988 network_exceptions = tuple(network_exceptions)
 989
 990
 991 class ExtractorError(YoutubeDLError):
 992     """Error during info extraction."""
 993
 994     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
 995         """ tb, if given, is the original traceback (so that it can be printed out).
 996         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
 997         """
 998         if sys.exc_info()[0] in network_exceptions:
 999             expected = True
1000
1001         self.orig_msg = str(msg)
1002         self.traceback = tb
1003         self.expected = expected
1004         self.cause = cause
1005         self.video_id = video_id
1006         self.ie = ie
1007         self.exc_info = sys.exc_info()  # preserve original exception
1008
1009         super().__init__(''.join((
1010             format_field(ie, template='[%s] '),
1011             format_field(video_id, template='%s: '),
1012             msg,
1013             format_field(cause, template=' (caused by %r)'),
1014             '' if expected else bug_reports_message())))
1015
1016     def format_traceback(self):
1017         return join_nonempty(
1018             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1019             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1020             delim='\n') or None
1021
1022
1023 class UnsupportedError(ExtractorError):
1024     def __init__(self, url):
1025         super().__init__(
1026             'Unsupported URL: %s' % url, expected=True)
1027         self.url = url
1028
1029
1030 class RegexNotFoundError(ExtractorError):
1031     """Error when a regex didn't match"""
1032     pass
1033
1034
1035 class GeoRestrictedError(ExtractorError):
1036     """Geographic restriction Error exception.
1037
1038     This exception may be thrown when a video is not available from your
1039     geographic location due to geographic restrictions imposed by a website.
1040     """
1041
1042     def __init__(self, msg, countries=None, **kwargs):
1043         kwargs['expected'] = True
1044         super().__init__(msg, **kwargs)
1045         self.countries = countries
1046
1047
1048 class DownloadError(YoutubeDLError):
1049     """Download Error exception.
1050
1051     This exception may be thrown by FileDownloader objects if they are not
1052     configured to continue on errors. They will contain the appropriate
1053     error message.
1054     """
1055
1056     def __init__(self, msg, exc_info=None):
1057         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1058         super().__init__(msg)
1059         self.exc_info = exc_info
1060
1061
1062 class EntryNotInPlaylist(YoutubeDLError):
1063     """Entry not in playlist exception.
1064
1065     This exception will be thrown by YoutubeDL when a requested entry
1066     is not found in the playlist info_dict
1067     """
1068     msg = 'Entry not found in info'
1069
1070
1071 class SameFileError(YoutubeDLError):
1072     """Same File exception.
1073
1074     This exception will be thrown by FileDownloader objects if they detect
1075     multiple files would have to be downloaded to the same file on disk.
1076     """
1077     msg = 'Fixed output name but more than one file to download'
1078
1079     def __init__(self, filename=None):
1080         if filename is not None:
1081             self.msg += f': {filename}'
1082         super().__init__(self.msg)
1083
1084
1085 class PostProcessingError(YoutubeDLError):
1086     """Post Processing exception.
1087
1088     This exception may be raised by PostProcessor's .run() method to
1089     indicate an error in the postprocessing task.
1090     """
1091
1092
1093 class DownloadCancelled(YoutubeDLError):
1094     """ Exception raised when the download queue should be interrupted """
1095     msg = 'The download was cancelled'
1096
1097
1098 class ExistingVideoReached(DownloadCancelled):
1099     """ --break-on-existing triggered """
1100     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1101
1102
1103 class RejectedVideoReached(DownloadCancelled):
1104     """ --break-on-reject triggered """
1105     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1106
1107
1108 class MaxDownloadsReached(DownloadCancelled):
1109     """ --max-downloads limit has been reached. """
1110     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1111
1112
1113 class ReExtractInfo(YoutubeDLError):
1114     """ Video info needs to be re-extracted. """
1115
1116     def __init__(self, msg, expected=False):
1117         super().__init__(msg)
1118         self.expected = expected
1119
1120
1121 class ThrottledDownload(ReExtractInfo):
1122     """ Download speed below --throttled-rate. """
1123     msg = 'The download speed is below throttle limit'
1124
1125     def __init__(self):
1126         super().__init__(self.msg, expected=False)
1127
1128
1129 class UnavailableVideoError(YoutubeDLError):
1130     """Unavailable Format exception.
1131
1132     This exception will be thrown when a video is requested
1133     in a format that is not available for that video.
1134     """
1135     msg = 'Unable to download video'
1136
1137     def __init__(self, err=None):
1138         if err is not None:
1139             self.msg += f': {err}'
1140         super().__init__(self.msg)
1141
1142
1143 class ContentTooShortError(YoutubeDLError):
1144     """Content Too Short exception.
1145
1146     This exception may be raised by FileDownloader objects when a file they
1147     download is too small for what the server announced first, indicating
1148     the connection was probably interrupted.
1149     """
1150
1151     def __init__(self, downloaded, expected):
1152         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1153         # Both in bytes
1154         self.downloaded = downloaded
1155         self.expected = expected
1156
1157
1158 class XAttrMetadataError(YoutubeDLError):
1159     def __init__(self, code=None, msg='Unknown error'):
1160         super().__init__(msg)
1161         self.code = code
1162         self.msg = msg
1163
1164         # Parsing code and msg
1165         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1166                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1167             self.reason = 'NO_SPACE'
1168         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1169             self.reason = 'VALUE_TOO_LONG'
1170         else:
1171             self.reason = 'NOT_SUPPORTED'
1172
1173
1174 class XAttrUnavailableError(YoutubeDLError):
1175     pass
1176
1177
1178 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1179     hc = http_class(*args, **kwargs)
1180     source_address = ydl_handler._params.get('source_address')
1181
1182     if source_address is not None:
1183         # This is to workaround _create_connection() from socket where it will try all
1184         # address data from getaddrinfo() including IPv6. This filters the result from
1185         # getaddrinfo() based on the source_address value.
1186         # This is based on the cpython socket.create_connection() function.
1187         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1188         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1189             host, port = address
1190             err = None
1191             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1192             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1193             ip_addrs = [addr for addr in addrs if addr[0] == af]
1194             if addrs and not ip_addrs:
1195                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1196                 raise OSError(
1197                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1198                     % (ip_version, source_address[0]))
1199             for res in ip_addrs:
1200                 af, socktype, proto, canonname, sa = res
1201                 sock = None
1202                 try:
1203                     sock = socket.socket(af, socktype, proto)
1204                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1205                         sock.settimeout(timeout)
1206                     sock.bind(source_address)
1207                     sock.connect(sa)
1208                     err = None  # Explicitly break reference cycle
1209                     return sock
1210                 except OSError as _:
1211                     err = _
1212                     if sock is not None:
1213                         sock.close()
1214             if err is not None:
1215                 raise err
1216             else:
1217                 raise OSError('getaddrinfo returns an empty list')
1218         if hasattr(hc, '_create_connection'):
1219             hc._create_connection = _create_connection
1220         hc.source_address = (source_address, 0)
1221
1222     return hc
1223
1224
1225 def handle_youtubedl_headers(headers):
1226     filtered_headers = headers
1227
1228     if 'Youtubedl-no-compression' in filtered_headers:
1229         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1230         del filtered_headers['Youtubedl-no-compression']
1231
1232     return filtered_headers
1233
1234
1235 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
1236     """Handler for HTTP requests and responses.
1237
1238     This class, when installed with an OpenerDirector, automatically adds
1239     the standard headers to every HTTP request and handles gzipped and
1240     deflated responses from web servers. If compression is to be avoided in
1241     a particular request, the original request in the program code only has
1242     to include the HTTP header "Youtubedl-no-compression", which will be
1243     removed before making the real request.
1244
1245     Part of this code was copied from:
1246
1247     http://techknack.net/python-urllib2-handlers/
1248
1249     Andrew Rowls, the author of that code, agreed to release it to the
1250     public domain.
1251     """
1252
1253     def __init__(self, params, *args, **kwargs):
1254         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
1255         self._params = params
1256
1257     def http_open(self, req):
1258         conn_class = compat_http_client.HTTPConnection
1259
1260         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1261         if socks_proxy:
1262             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1263             del req.headers['Ytdl-socks-proxy']
1264
1265         return self.do_open(functools.partial(
1266             _create_http_connection, self, conn_class, False),
1267             req)
1268
1269     @staticmethod
1270     def deflate(data):
1271         if not data:
1272             return data
1273         try:
1274             return zlib.decompress(data, -zlib.MAX_WBITS)
1275         except zlib.error:
1276             return zlib.decompress(data)
1277
1278     @staticmethod
1279     def brotli(data):
1280         if not data:
1281             return data
1282         return brotli.decompress(data)
1283
1284     def http_request(self, req):
1285         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1286         # always respected by websites, some tend to give out URLs with non percent-encoded
1287         # non-ASCII characters (see telemb.py, ard.py [#3412])
1288         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1289         # To work around aforementioned issue we will replace request's original URL with
1290         # percent-encoded one
1291         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1292         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1293         url = req.get_full_url()
1294         url_escaped = escape_url(url)
1295
1296         # Substitute URL if any change after escaping
1297         if url != url_escaped:
1298             req = update_Request(req, url=url_escaped)
1299
1300         for h, v in self._params.get('http_headers', std_headers).items():
1301             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1302             # The dict keys are capitalized because of this bug by urllib
1303             if h.capitalize() not in req.headers:
1304                 req.add_header(h, v)
1305
1306         if 'Accept-encoding' not in req.headers:
1307             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1308
1309         req.headers = handle_youtubedl_headers(req.headers)
1310
1311         return req
1312
1313     def http_response(self, req, resp):
1314         old_resp = resp
1315         # gzip
1316         if resp.headers.get('Content-encoding', '') == 'gzip':
1317             content = resp.read()
1318             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1319             try:
1320                 uncompressed = io.BytesIO(gz.read())
1321             except OSError as original_ioerror:
1322                 # There may be junk add the end of the file
1323                 # See http://stackoverflow.com/q/4928560/35070 for details
1324                 for i in range(1, 1024):
1325                     try:
1326                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1327                         uncompressed = io.BytesIO(gz.read())
1328                     except OSError:
1329                         continue
1330                     break
1331                 else:
1332                     raise original_ioerror
1333             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1334             resp.msg = old_resp.msg
1335             del resp.headers['Content-encoding']
1336         # deflate
1337         if resp.headers.get('Content-encoding', '') == 'deflate':
1338             gz = io.BytesIO(self.deflate(resp.read()))
1339             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1340             resp.msg = old_resp.msg
1341             del resp.headers['Content-encoding']
1342         # brotli
1343         if resp.headers.get('Content-encoding', '') == 'br':
1344             resp = compat_urllib_request.addinfourl(
1345                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1346             resp.msg = old_resp.msg
1347             del resp.headers['Content-encoding']
1348         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1349         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1350         if 300 <= resp.code < 400:
1351             location = resp.headers.get('Location')
1352             if location:
1353                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1354                 location = location.encode('iso-8859-1').decode()
1355                 location_escaped = escape_url(location)
1356                 if location != location_escaped:
1357                     del resp.headers['Location']
1358                     resp.headers['Location'] = location_escaped
1359         return resp
1360
1361     https_request = http_request
1362     https_response = http_response
1363
1364
1365 def make_socks_conn_class(base_class, socks_proxy):
1366     assert issubclass(base_class, (
1367         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
1368
1369     url_components = compat_urlparse.urlparse(socks_proxy)
1370     if url_components.scheme.lower() == 'socks5':
1371         socks_type = ProxyType.SOCKS5
1372     elif url_components.scheme.lower() in ('socks', 'socks4'):
1373         socks_type = ProxyType.SOCKS4
1374     elif url_components.scheme.lower() == 'socks4a':
1375         socks_type = ProxyType.SOCKS4A
1376
1377     def unquote_if_non_empty(s):
1378         if not s:
1379             return s
1380         return compat_urllib_parse_unquote_plus(s)
1381
1382     proxy_args = (
1383         socks_type,
1384         url_components.hostname, url_components.port or 1080,
1385         True,  # Remote DNS
1386         unquote_if_non_empty(url_components.username),
1387         unquote_if_non_empty(url_components.password),
1388     )
1389
1390     class SocksConnection(base_class):
1391         def connect(self):
1392             self.sock = sockssocket()
1393             self.sock.setproxy(*proxy_args)
1394             if isinstance(self.timeout, (int, float)):
1395                 self.sock.settimeout(self.timeout)
1396             self.sock.connect((self.host, self.port))
1397
1398             if isinstance(self, compat_http_client.HTTPSConnection):
1399                 if hasattr(self, '_context'):  # Python > 2.6
1400                     self.sock = self._context.wrap_socket(
1401                         self.sock, server_hostname=self.host)
1402                 else:
1403                     self.sock = ssl.wrap_socket(self.sock)
1404
1405     return SocksConnection
1406
1407
1408 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
1409     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1410         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
1411         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
1412         self._params = params
1413
1414     def https_open(self, req):
1415         kwargs = {}
1416         conn_class = self._https_conn_class
1417
1418         if hasattr(self, '_context'):  # python > 2.6
1419             kwargs['context'] = self._context
1420         if hasattr(self, '_check_hostname'):  # python 3.x
1421             kwargs['check_hostname'] = self._check_hostname
1422
1423         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1424         if socks_proxy:
1425             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1426             del req.headers['Ytdl-socks-proxy']
1427
1428         try:
1429             return self.do_open(
1430                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1431         except urllib.error.URLError as e:
1432             if (isinstance(e.reason, ssl.SSLError)
1433                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1434                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1435             raise
1436
1437
1438 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1439     """
1440     See [1] for cookie file format.
1441
1442     1. https://curl.haxx.se/docs/http-cookies.html
1443     """
1444     _HTTPONLY_PREFIX = '#HttpOnly_'
1445     _ENTRY_LEN = 7
1446     _HEADER = '''# Netscape HTTP Cookie File
1447 # This file is generated by yt-dlp.  Do not edit.
1448
1449 '''
1450     _CookieFileEntry = collections.namedtuple(
1451         'CookieFileEntry',
1452         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1453
1454     def __init__(self, filename=None, *args, **kwargs):
1455         super().__init__(None, *args, **kwargs)
1456         if self.is_path(filename):
1457             filename = os.fspath(filename)
1458         self.filename = filename
1459
1460     @staticmethod
1461     def _true_or_false(cndn):
1462         return 'TRUE' if cndn else 'FALSE'
1463
1464     @staticmethod
1465     def is_path(file):
1466         return isinstance(file, (str, bytes, os.PathLike))
1467
1468     @contextlib.contextmanager
1469     def open(self, file, *, write=False):
1470         if self.is_path(file):
1471             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1472                 yield f
1473         else:
1474             if write:
1475                 file.truncate(0)
1476             yield file
1477
1478     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1479         now = time.time()
1480         for cookie in self:
1481             if (not ignore_discard and cookie.discard
1482                     or not ignore_expires and cookie.is_expired(now)):
1483                 continue
1484             name, value = cookie.name, cookie.value
1485             if value is None:
1486                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1487                 # with no name, whereas http.cookiejar regards it as a
1488                 # cookie with no value.
1489                 name, value = '', name
1490             f.write('%s\n' % '\t'.join((
1491                 cookie.domain,
1492                 self._true_or_false(cookie.domain.startswith('.')),
1493                 cookie.path,
1494                 self._true_or_false(cookie.secure),
1495                 str_or_none(cookie.expires, default=''),
1496                 name, value
1497             )))
1498
1499     def save(self, filename=None, *args, **kwargs):
1500         """
1501         Save cookies to a file.
1502         Code is taken from CPython 3.6
1503         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1504
1505         if filename is None:
1506             if self.filename is not None:
1507                 filename = self.filename
1508             else:
1509                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1510
1511         # Store session cookies with `expires` set to 0 instead of an empty string
1512         for cookie in self:
1513             if cookie.expires is None:
1514                 cookie.expires = 0
1515
1516         with self.open(filename, write=True) as f:
1517             f.write(self._HEADER)
1518             self._really_save(f, *args, **kwargs)
1519
1520     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1521         """Load cookies from a file."""
1522         if filename is None:
1523             if self.filename is not None:
1524                 filename = self.filename
1525             else:
1526                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1527
1528         def prepare_line(line):
1529             if line.startswith(self._HTTPONLY_PREFIX):
1530                 line = line[len(self._HTTPONLY_PREFIX):]
1531             # comments and empty lines are fine
1532             if line.startswith('#') or not line.strip():
1533                 return line
1534             cookie_list = line.split('\t')
1535             if len(cookie_list) != self._ENTRY_LEN:
1536                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
1537             cookie = self._CookieFileEntry(*cookie_list)
1538             if cookie.expires_at and not cookie.expires_at.isdigit():
1539                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1540             return line
1541
1542         cf = io.StringIO()
1543         with self.open(filename) as f:
1544             for line in f:
1545                 try:
1546                     cf.write(prepare_line(line))
1547                 except compat_cookiejar.LoadError as e:
1548                     if f'{line.strip()} '[0] in '[{"':
1549                         raise compat_cookiejar.LoadError(
1550                             'Cookies file must be Netscape formatted, not JSON. See  '
1551                             'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl')
1552                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1553                     continue
1554         cf.seek(0)
1555         self._really_load(cf, filename, ignore_discard, ignore_expires)
1556         # Session cookies are denoted by either `expires` field set to
1557         # an empty string or 0. MozillaCookieJar only recognizes the former
1558         # (see [1]). So we need force the latter to be recognized as session
1559         # cookies on our own.
1560         # Session cookies may be important for cookies-based authentication,
1561         # e.g. usually, when user does not check 'Remember me' check box while
1562         # logging in on a site, some important cookies are stored as session
1563         # cookies so that not recognizing them will result in failed login.
1564         # 1. https://bugs.python.org/issue17164
1565         for cookie in self:
1566             # Treat `expires=0` cookies as session cookies
1567             if cookie.expires == 0:
1568                 cookie.expires = None
1569                 cookie.discard = True
1570
1571
1572 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
1573     def __init__(self, cookiejar=None):
1574         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
1575
1576     def http_response(self, request, response):
1577         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
1578
1579     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
1580     https_response = http_response
1581
1582
1583 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
1584     """YoutubeDL redirect handler
1585
1586     The code is based on HTTPRedirectHandler implementation from CPython [1].
1587
1588     This redirect handler solves two issues:
1589      - ensures redirect URL is always unicode under python 2
1590      - introduces support for experimental HTTP response status code
1591        308 Permanent Redirect [2] used by some sites [3]
1592
1593     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1594     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1595     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1596     """
1597
1598     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
1599
1600     def redirect_request(self, req, fp, code, msg, headers, newurl):
1601         """Return a Request or None in response to a redirect.
1602
1603         This is called by the http_error_30x methods when a
1604         redirection response is received.  If a redirection should
1605         take place, return a new Request to allow http_error_30x to
1606         perform the redirect.  Otherwise, raise HTTPError if no-one
1607         else should try to handle this url.  Return None if you can't
1608         but another Handler might.
1609         """
1610         m = req.get_method()
1611         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1612                  or code in (301, 302, 303) and m == "POST")):
1613             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
1614         # Strictly (according to RFC 2616), 301 or 302 in response to
1615         # a POST MUST NOT cause a redirection without confirmation
1616         # from the user (of urllib.request, in this case).  In practice,
1617         # essentially all clients do redirect in this case, so we do
1618         # the same.
1619
1620         # Be conciliant with URIs containing a space.  This is mainly
1621         # redundant with the more complete encoding done in http_error_302(),
1622         # but it is kept for compatibility with other callers.
1623         newurl = newurl.replace(' ', '%20')
1624
1625         CONTENT_HEADERS = ("content-length", "content-type")
1626         # NB: don't use dict comprehension for python 2.6 compatibility
1627         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1628
1629         # A 303 must either use GET or HEAD for subsequent request
1630         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1631         if code == 303 and m != 'HEAD':
1632             m = 'GET'
1633         # 301 and 302 redirects are commonly turned into a GET from a POST
1634         # for subsequent requests by browsers, so we'll do the same.
1635         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1636         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1637         if code in (301, 302) and m == 'POST':
1638             m = 'GET'
1639
1640         return compat_urllib_request.Request(
1641             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1642             unverifiable=True, method=m)
1643
1644
1645 def extract_timezone(date_str):
1646     m = re.search(
1647         r'''(?x)
1648             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1649             (?P<tz>Z|                                            # just the UTC Z, or
1650                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1651                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1652                    [ ]?                                          # optional space
1653                 (?P<sign>\+|-)                                   # +/-
1654                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1655             $)
1656         ''', date_str)
1657     if not m:
1658         timezone = datetime.timedelta()
1659     else:
1660         date_str = date_str[:-len(m.group('tz'))]
1661         if not m.group('sign'):
1662             timezone = datetime.timedelta()
1663         else:
1664             sign = 1 if m.group('sign') == '+' else -1
1665             timezone = datetime.timedelta(
1666                 hours=sign * int(m.group('hours')),
1667                 minutes=sign * int(m.group('minutes')))
1668     return timezone, date_str
1669
1670
1671 def parse_iso8601(date_str, delimiter='T', timezone=None):
1672     """ Return a UNIX timestamp from the given date """
1673
1674     if date_str is None:
1675         return None
1676
1677     date_str = re.sub(r'\.[0-9]+', '', date_str)
1678
1679     if timezone is None:
1680         timezone, date_str = extract_timezone(date_str)
1681
1682     with contextlib.suppress(ValueError):
1683         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1684         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1685         return calendar.timegm(dt.timetuple())
1686
1687
1688 def date_formats(day_first=True):
1689     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1690
1691
1692 def unified_strdate(date_str, day_first=True):
1693     """Return a string with the date in the format YYYYMMDD"""
1694
1695     if date_str is None:
1696         return None
1697     upload_date = None
1698     # Replace commas
1699     date_str = date_str.replace(',', ' ')
1700     # Remove AM/PM + timezone
1701     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1702     _, date_str = extract_timezone(date_str)
1703
1704     for expression in date_formats(day_first):
1705         with contextlib.suppress(ValueError):
1706             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1707     if upload_date is None:
1708         timetuple = email.utils.parsedate_tz(date_str)
1709         if timetuple:
1710             with contextlib.suppress(ValueError):
1711                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1712     if upload_date is not None:
1713         return compat_str(upload_date)
1714
1715
1716 def unified_timestamp(date_str, day_first=True):
1717     if date_str is None:
1718         return None
1719
1720     date_str = re.sub(r'[,|]', '', date_str)
1721
1722     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1723     timezone, date_str = extract_timezone(date_str)
1724
1725     # Remove AM/PM + timezone
1726     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1727
1728     # Remove unrecognized timezones from ISO 8601 alike timestamps
1729     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1730     if m:
1731         date_str = date_str[:-len(m.group('tz'))]
1732
1733     # Python only supports microseconds, so remove nanoseconds
1734     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1735     if m:
1736         date_str = m.group(1)
1737
1738     for expression in date_formats(day_first):
1739         with contextlib.suppress(ValueError):
1740             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1741             return calendar.timegm(dt.timetuple())
1742     timetuple = email.utils.parsedate_tz(date_str)
1743     if timetuple:
1744         return calendar.timegm(timetuple) + pm_delta * 3600
1745
1746
1747 def determine_ext(url, default_ext='unknown_video'):
1748     if url is None or '.' not in url:
1749         return default_ext
1750     guess = url.partition('?')[0].rpartition('.')[2]
1751     if re.match(r'^[A-Za-z0-9]+$', guess):
1752         return guess
1753     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1754     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1755         return guess.rstrip('/')
1756     else:
1757         return default_ext
1758
1759
1760 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1761     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1762
1763
1764 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1765     R"""
1766     Return a datetime object from a string.
1767     Supported format:
1768         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1769
1770     @param format       strftime format of DATE
1771     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1772                         auto: round to the unit provided in date_str (if applicable).
1773     """
1774     auto_precision = False
1775     if precision == 'auto':
1776         auto_precision = True
1777         precision = 'microsecond'
1778     today = datetime_round(datetime.datetime.utcnow(), precision)
1779     if date_str in ('now', 'today'):
1780         return today
1781     if date_str == 'yesterday':
1782         return today - datetime.timedelta(days=1)
1783     match = re.match(
1784         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1785         date_str)
1786     if match is not None:
1787         start_time = datetime_from_str(match.group('start'), precision, format)
1788         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1789         unit = match.group('unit')
1790         if unit == 'month' or unit == 'year':
1791             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1792             unit = 'day'
1793         else:
1794             if unit == 'week':
1795                 unit = 'day'
1796                 time *= 7
1797             delta = datetime.timedelta(**{unit + 's': time})
1798             new_date = start_time + delta
1799         if auto_precision:
1800             return datetime_round(new_date, unit)
1801         return new_date
1802
1803     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1804
1805
1806 def date_from_str(date_str, format='%Y%m%d', strict=False):
1807     R"""
1808     Return a date object from a string using datetime_from_str
1809
1810     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1811                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1812     """
1813     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1814         raise ValueError(f'Invalid date format "{date_str}"')
1815     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1816
1817
1818 def datetime_add_months(dt, months):
1819     """Increment/Decrement a datetime object by months."""
1820     month = dt.month + months - 1
1821     year = dt.year + month // 12
1822     month = month % 12 + 1
1823     day = min(dt.day, calendar.monthrange(year, month)[1])
1824     return dt.replace(year, month, day)
1825
1826
1827 def datetime_round(dt, precision='day'):
1828     """
1829     Round a datetime object's time to a specific precision
1830     """
1831     if precision == 'microsecond':
1832         return dt
1833
1834     unit_seconds = {
1835         'day': 86400,
1836         'hour': 3600,
1837         'minute': 60,
1838         'second': 1,
1839     }
1840     roundto = lambda x, n: ((x + n / 2) // n) * n
1841     timestamp = calendar.timegm(dt.timetuple())
1842     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1843
1844
1845 def hyphenate_date(date_str):
1846     """
1847     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1848     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1849     if match is not None:
1850         return '-'.join(match.groups())
1851     else:
1852         return date_str
1853
1854
1855 class DateRange:
1856     """Represents a time interval between two dates"""
1857
1858     def __init__(self, start=None, end=None):
1859         """start and end must be strings in the format accepted by date"""
1860         if start is not None:
1861             self.start = date_from_str(start, strict=True)
1862         else:
1863             self.start = datetime.datetime.min.date()
1864         if end is not None:
1865             self.end = date_from_str(end, strict=True)
1866         else:
1867             self.end = datetime.datetime.max.date()
1868         if self.start > self.end:
1869             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1870
1871     @classmethod
1872     def day(cls, day):
1873         """Returns a range that only contains the given day"""
1874         return cls(day, day)
1875
1876     def __contains__(self, date):
1877         """Check if the date is in the range"""
1878         if not isinstance(date, datetime.date):
1879             date = date_from_str(date)
1880         return self.start <= date <= self.end
1881
1882     def __str__(self):
1883         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1884
1885
1886 def platform_name():
1887     """ Returns the platform name as a compat_str """
1888     res = platform.platform()
1889     if isinstance(res, bytes):
1890         res = res.decode(preferredencoding())
1891
1892     assert isinstance(res, compat_str)
1893     return res
1894
1895
1896 @functools.cache
1897 def get_windows_version():
1898     ''' Get Windows version. returns () if it's not running on Windows '''
1899     if compat_os_name == 'nt':
1900         return version_tuple(platform.win32_ver()[1])
1901     else:
1902         return ()
1903
1904
1905 def write_string(s, out=None, encoding=None):
1906     assert isinstance(s, str)
1907     out = out or sys.stderr
1908
1909     if compat_os_name == 'nt' and supports_terminal_sequences(out):
1910         s = re.sub(r'([\r\n]+)', r' \1', s)
1911
1912     enc, buffer = None, out
1913     if 'b' in getattr(out, 'mode', ''):
1914         enc = encoding or preferredencoding()
1915     elif hasattr(out, 'buffer'):
1916         buffer = out.buffer
1917         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1918
1919     buffer.write(s.encode(enc, 'ignore') if enc else s)
1920     out.flush()
1921
1922
1923 def bytes_to_intlist(bs):
1924     if not bs:
1925         return []
1926     if isinstance(bs[0], int):  # Python 3
1927         return list(bs)
1928     else:
1929         return [ord(c) for c in bs]
1930
1931
1932 def intlist_to_bytes(xs):
1933     if not xs:
1934         return b''
1935     return compat_struct_pack('%dB' % len(xs), *xs)
1936
1937
1938 class LockingUnsupportedError(OSError):
1939     msg = 'File locking is not supported'
1940
1941     def __init__(self):
1942         super().__init__(self.msg)
1943
1944
1945 # Cross-platform file locking
1946 if sys.platform == 'win32':
1947     import ctypes.wintypes
1948     import msvcrt
1949
1950     class OVERLAPPED(ctypes.Structure):
1951         _fields_ = [
1952             ('Internal', ctypes.wintypes.LPVOID),
1953             ('InternalHigh', ctypes.wintypes.LPVOID),
1954             ('Offset', ctypes.wintypes.DWORD),
1955             ('OffsetHigh', ctypes.wintypes.DWORD),
1956             ('hEvent', ctypes.wintypes.HANDLE),
1957         ]
1958
1959     kernel32 = ctypes.windll.kernel32
1960     LockFileEx = kernel32.LockFileEx
1961     LockFileEx.argtypes = [
1962         ctypes.wintypes.HANDLE,     # hFile
1963         ctypes.wintypes.DWORD,      # dwFlags
1964         ctypes.wintypes.DWORD,      # dwReserved
1965         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1966         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1967         ctypes.POINTER(OVERLAPPED)  # Overlapped
1968     ]
1969     LockFileEx.restype = ctypes.wintypes.BOOL
1970     UnlockFileEx = kernel32.UnlockFileEx
1971     UnlockFileEx.argtypes = [
1972         ctypes.wintypes.HANDLE,     # hFile
1973         ctypes.wintypes.DWORD,      # dwReserved
1974         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1975         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1976         ctypes.POINTER(OVERLAPPED)  # Overlapped
1977     ]
1978     UnlockFileEx.restype = ctypes.wintypes.BOOL
1979     whole_low = 0xffffffff
1980     whole_high = 0x7fffffff
1981
1982     def _lock_file(f, exclusive, block):
1983         overlapped = OVERLAPPED()
1984         overlapped.Offset = 0
1985         overlapped.OffsetHigh = 0
1986         overlapped.hEvent = 0
1987         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1988
1989         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
1990                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
1991                           0, whole_low, whole_high, f._lock_file_overlapped_p):
1992             raise BlockingIOError('Locking file failed: %r' % ctypes.FormatError())
1993
1994     def _unlock_file(f):
1995         assert f._lock_file_overlapped_p
1996         handle = msvcrt.get_osfhandle(f.fileno())
1997         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
1998             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1999
2000 else:
2001     try:
2002         import fcntl
2003
2004         def _lock_file(f, exclusive, block):
2005             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2006             if not block:
2007                 flags |= fcntl.LOCK_NB
2008             try:
2009                 fcntl.flock(f, flags)
2010             except BlockingIOError:
2011                 raise
2012             except OSError:  # AOSP does not have flock()
2013                 fcntl.lockf(f, flags)
2014
2015         def _unlock_file(f):
2016             try:
2017                 fcntl.flock(f, fcntl.LOCK_UN)
2018             except OSError:
2019                 fcntl.lockf(f, fcntl.LOCK_UN)
2020
2021     except ImportError:
2022
2023         def _lock_file(f, exclusive, block):
2024             raise LockingUnsupportedError()
2025
2026         def _unlock_file(f):
2027             raise LockingUnsupportedError()
2028
2029
2030 class locked_file:
2031     locked = False
2032
2033     def __init__(self, filename, mode, block=True, encoding=None):
2034         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2035             raise NotImplementedError(mode)
2036         self.mode, self.block = mode, block
2037
2038         writable = any(f in mode for f in 'wax+')
2039         readable = any(f in mode for f in 'r+')
2040         flags = functools.reduce(operator.ior, (
2041             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2042             getattr(os, 'O_BINARY', 0),  # Windows only
2043             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2044             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2045             os.O_APPEND if 'a' in mode else 0,
2046             os.O_EXCL if 'x' in mode else 0,
2047             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2048         ))
2049
2050         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2051
2052     def __enter__(self):
2053         exclusive = 'r' not in self.mode
2054         try:
2055             _lock_file(self.f, exclusive, self.block)
2056             self.locked = True
2057         except OSError:
2058             self.f.close()
2059             raise
2060         if 'w' in self.mode:
2061             try:
2062                 self.f.truncate()
2063             except OSError as e:
2064                 if e.errno not in (
2065                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2066                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2067                 ):
2068                     raise
2069         return self
2070
2071     def unlock(self):
2072         if not self.locked:
2073             return
2074         try:
2075             _unlock_file(self.f)
2076         finally:
2077             self.locked = False
2078
2079     def __exit__(self, *_):
2080         try:
2081             self.unlock()
2082         finally:
2083             self.f.close()
2084
2085     open = __enter__
2086     close = __exit__
2087
2088     def __getattr__(self, attr):
2089         return getattr(self.f, attr)
2090
2091     def __iter__(self):
2092         return iter(self.f)
2093
2094
2095 @functools.cache
2096 def get_filesystem_encoding():
2097     encoding = sys.getfilesystemencoding()
2098     return encoding if encoding is not None else 'utf-8'
2099
2100
2101 def shell_quote(args):
2102     quoted_args = []
2103     encoding = get_filesystem_encoding()
2104     for a in args:
2105         if isinstance(a, bytes):
2106             # We may get a filename encoded with 'encodeFilename'
2107             a = a.decode(encoding)
2108         quoted_args.append(compat_shlex_quote(a))
2109     return ' '.join(quoted_args)
2110
2111
2112 def smuggle_url(url, data):
2113     """ Pass additional data in a URL for internal use. """
2114
2115     url, idata = unsmuggle_url(url, {})
2116     data.update(idata)
2117     sdata = compat_urllib_parse_urlencode(
2118         {'__youtubedl_smuggle': json.dumps(data)})
2119     return url + '#' + sdata
2120
2121
2122 def unsmuggle_url(smug_url, default=None):
2123     if '#__youtubedl_smuggle' not in smug_url:
2124         return smug_url, default
2125     url, _, sdata = smug_url.rpartition('#')
2126     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
2127     data = json.loads(jsond)
2128     return url, data
2129
2130
2131 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2132     """ Formats numbers with decimal sufixes like K, M, etc """
2133     num, factor = float_or_none(num), float(factor)
2134     if num is None or num < 0:
2135         return None
2136     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2137     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2138     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2139     if factor == 1024:
2140         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2141     converted = num / (factor ** exponent)
2142     return fmt % (converted, suffix)
2143
2144
2145 def format_bytes(bytes):
2146     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2147
2148
2149 def lookup_unit_table(unit_table, s):
2150     units_re = '|'.join(re.escape(u) for u in unit_table)
2151     m = re.match(
2152         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2153     if not m:
2154         return None
2155     num_str = m.group('num').replace(',', '.')
2156     mult = unit_table[m.group('unit')]
2157     return int(float(num_str) * mult)
2158
2159
2160 def parse_filesize(s):
2161     if s is None:
2162         return None
2163
2164     # The lower-case forms are of course incorrect and unofficial,
2165     # but we support those too
2166     _UNIT_TABLE = {
2167         'B': 1,
2168         'b': 1,
2169         'bytes': 1,
2170         'KiB': 1024,
2171         'KB': 1000,
2172         'kB': 1024,
2173         'Kb': 1000,
2174         'kb': 1000,
2175         'kilobytes': 1000,
2176         'kibibytes': 1024,
2177         'MiB': 1024 ** 2,
2178         'MB': 1000 ** 2,
2179         'mB': 1024 ** 2,
2180         'Mb': 1000 ** 2,
2181         'mb': 1000 ** 2,
2182         'megabytes': 1000 ** 2,
2183         'mebibytes': 1024 ** 2,
2184         'GiB': 1024 ** 3,
2185         'GB': 1000 ** 3,
2186         'gB': 1024 ** 3,
2187         'Gb': 1000 ** 3,
2188         'gb': 1000 ** 3,
2189         'gigabytes': 1000 ** 3,
2190         'gibibytes': 1024 ** 3,
2191         'TiB': 1024 ** 4,
2192         'TB': 1000 ** 4,
2193         'tB': 1024 ** 4,
2194         'Tb': 1000 ** 4,
2195         'tb': 1000 ** 4,
2196         'terabytes': 1000 ** 4,
2197         'tebibytes': 1024 ** 4,
2198         'PiB': 1024 ** 5,
2199         'PB': 1000 ** 5,
2200         'pB': 1024 ** 5,
2201         'Pb': 1000 ** 5,
2202         'pb': 1000 ** 5,
2203         'petabytes': 1000 ** 5,
2204         'pebibytes': 1024 ** 5,
2205         'EiB': 1024 ** 6,
2206         'EB': 1000 ** 6,
2207         'eB': 1024 ** 6,
2208         'Eb': 1000 ** 6,
2209         'eb': 1000 ** 6,
2210         'exabytes': 1000 ** 6,
2211         'exbibytes': 1024 ** 6,
2212         'ZiB': 1024 ** 7,
2213         'ZB': 1000 ** 7,
2214         'zB': 1024 ** 7,
2215         'Zb': 1000 ** 7,
2216         'zb': 1000 ** 7,
2217         'zettabytes': 1000 ** 7,
2218         'zebibytes': 1024 ** 7,
2219         'YiB': 1024 ** 8,
2220         'YB': 1000 ** 8,
2221         'yB': 1024 ** 8,
2222         'Yb': 1000 ** 8,
2223         'yb': 1000 ** 8,
2224         'yottabytes': 1000 ** 8,
2225         'yobibytes': 1024 ** 8,
2226     }
2227
2228     return lookup_unit_table(_UNIT_TABLE, s)
2229
2230
2231 def parse_count(s):
2232     if s is None:
2233         return None
2234
2235     s = re.sub(r'^[^\d]+\s', '', s).strip()
2236
2237     if re.match(r'^[\d,.]+$', s):
2238         return str_to_int(s)
2239
2240     _UNIT_TABLE = {
2241         'k': 1000,
2242         'K': 1000,
2243         'm': 1000 ** 2,
2244         'M': 1000 ** 2,
2245         'kk': 1000 ** 2,
2246         'KK': 1000 ** 2,
2247         'b': 1000 ** 3,
2248         'B': 1000 ** 3,
2249     }
2250
2251     ret = lookup_unit_table(_UNIT_TABLE, s)
2252     if ret is not None:
2253         return ret
2254
2255     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2256     if mobj:
2257         return str_to_int(mobj.group(1))
2258
2259
2260 def parse_resolution(s, *, lenient=False):
2261     if s is None:
2262         return {}
2263
2264     if lenient:
2265         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2266     else:
2267         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2268     if mobj:
2269         return {
2270             'width': int(mobj.group('w')),
2271             'height': int(mobj.group('h')),
2272         }
2273
2274     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2275     if mobj:
2276         return {'height': int(mobj.group(1))}
2277
2278     mobj = re.search(r'\b([48])[kK]\b', s)
2279     if mobj:
2280         return {'height': int(mobj.group(1)) * 540}
2281
2282     return {}
2283
2284
2285 def parse_bitrate(s):
2286     if not isinstance(s, compat_str):
2287         return
2288     mobj = re.search(r'\b(\d+)\s*kbps', s)
2289     if mobj:
2290         return int(mobj.group(1))
2291
2292
2293 def month_by_name(name, lang='en'):
2294     """ Return the number of a month by (locale-independently) English name """
2295
2296     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2297
2298     try:
2299         return month_names.index(name) + 1
2300     except ValueError:
2301         return None
2302
2303
2304 def month_by_abbreviation(abbrev):
2305     """ Return the number of a month by (locale-independently) English
2306         abbreviations """
2307
2308     try:
2309         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2310     except ValueError:
2311         return None
2312
2313
2314 def fix_xml_ampersands(xml_str):
2315     """Replace all the '&' by '&amp;' in XML"""
2316     return re.sub(
2317         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2318         '&amp;',
2319         xml_str)
2320
2321
2322 def setproctitle(title):
2323     assert isinstance(title, compat_str)
2324
2325     # ctypes in Jython is not complete
2326     # http://bugs.jython.org/issue2148
2327     if sys.platform.startswith('java'):
2328         return
2329
2330     try:
2331         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2332     except OSError:
2333         return
2334     except TypeError:
2335         # LoadLibrary in Windows Python 2.7.13 only expects
2336         # a bytestring, but since unicode_literals turns
2337         # every string into a unicode string, it fails.
2338         return
2339     title_bytes = title.encode()
2340     buf = ctypes.create_string_buffer(len(title_bytes))
2341     buf.value = title_bytes
2342     try:
2343         libc.prctl(15, buf, 0, 0, 0)
2344     except AttributeError:
2345         return  # Strange libc, just skip this
2346
2347
2348 def remove_start(s, start):
2349     return s[len(start):] if s is not None and s.startswith(start) else s
2350
2351
2352 def remove_end(s, end):
2353     return s[:-len(end)] if s is not None and s.endswith(end) else s
2354
2355
2356 def remove_quotes(s):
2357     if s is None or len(s) < 2:
2358         return s
2359     for quote in ('"', "'", ):
2360         if s[0] == quote and s[-1] == quote:
2361             return s[1:-1]
2362     return s
2363
2364
2365 def get_domain(url):
2366     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
2367     return domain.group('domain') if domain else None
2368
2369
2370 def url_basename(url):
2371     path = compat_urlparse.urlparse(url).path
2372     return path.strip('/').split('/')[-1]
2373
2374
2375 def base_url(url):
2376     return re.match(r'https?://[^?#&]+/', url).group()
2377
2378
2379 def urljoin(base, path):
2380     if isinstance(path, bytes):
2381         path = path.decode()
2382     if not isinstance(path, compat_str) or not path:
2383         return None
2384     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2385         return path
2386     if isinstance(base, bytes):
2387         base = base.decode()
2388     if not isinstance(base, compat_str) or not re.match(
2389             r'^(?:https?:)?//', base):
2390         return None
2391     return compat_urlparse.urljoin(base, path)
2392
2393
2394 class HEADRequest(compat_urllib_request.Request):
2395     def get_method(self):
2396         return 'HEAD'
2397
2398
2399 class PUTRequest(compat_urllib_request.Request):
2400     def get_method(self):
2401         return 'PUT'
2402
2403
2404 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2405     if get_attr and v is not None:
2406         v = getattr(v, get_attr, None)
2407     try:
2408         return int(v) * invscale // scale
2409     except (ValueError, TypeError, OverflowError):
2410         return default
2411
2412
2413 def str_or_none(v, default=None):
2414     return default if v is None else compat_str(v)
2415
2416
2417 def str_to_int(int_str):
2418     """ A more relaxed version of int_or_none """
2419     if isinstance(int_str, int):
2420         return int_str
2421     elif isinstance(int_str, compat_str):
2422         int_str = re.sub(r'[,\.\+]', '', int_str)
2423         return int_or_none(int_str)
2424
2425
2426 def float_or_none(v, scale=1, invscale=1, default=None):
2427     if v is None:
2428         return default
2429     try:
2430         return float(v) * invscale / scale
2431     except (ValueError, TypeError):
2432         return default
2433
2434
2435 def bool_or_none(v, default=None):
2436     return v if isinstance(v, bool) else default
2437
2438
2439 def strip_or_none(v, default=None):
2440     return v.strip() if isinstance(v, compat_str) else default
2441
2442
2443 def url_or_none(url):
2444     if not url or not isinstance(url, compat_str):
2445         return None
2446     url = url.strip()
2447     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2448
2449
2450 def request_to_url(req):
2451     if isinstance(req, compat_urllib_request.Request):
2452         return req.get_full_url()
2453     else:
2454         return req
2455
2456
2457 def strftime_or_none(timestamp, date_format, default=None):
2458     datetime_object = None
2459     try:
2460         if isinstance(timestamp, (int, float)):  # unix timestamp
2461             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2462         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
2463             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2464         return datetime_object.strftime(date_format)
2465     except (ValueError, TypeError, AttributeError):
2466         return default
2467
2468
2469 def parse_duration(s):
2470     if not isinstance(s, str):
2471         return None
2472     s = s.strip()
2473     if not s:
2474         return None
2475
2476     days, hours, mins, secs, ms = [None] * 5
2477     m = re.match(r'''(?x)
2478             (?P<before_secs>
2479                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2480             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2481             (?P<ms>[.:][0-9]+)?Z?$
2482         ''', s)
2483     if m:
2484         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2485     else:
2486         m = re.match(
2487             r'''(?ix)(?:P?
2488                 (?:
2489                     [0-9]+\s*y(?:ears?)?,?\s*
2490                 )?
2491                 (?:
2492                     [0-9]+\s*m(?:onths?)?,?\s*
2493                 )?
2494                 (?:
2495                     [0-9]+\s*w(?:eeks?)?,?\s*
2496                 )?
2497                 (?:
2498                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2499                 )?
2500                 T)?
2501                 (?:
2502                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2503                 )?
2504                 (?:
2505                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2506                 )?
2507                 (?:
2508                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2509                 )?Z?$''', s)
2510         if m:
2511             days, hours, mins, secs, ms = m.groups()
2512         else:
2513             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2514             if m:
2515                 hours, mins = m.groups()
2516             else:
2517                 return None
2518
2519     if ms:
2520         ms = ms.replace(':', '.')
2521     return sum(float(part or 0) * mult for part, mult in (
2522         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2523
2524
2525 def prepend_extension(filename, ext, expected_real_ext=None):
2526     name, real_ext = os.path.splitext(filename)
2527     return (
2528         f'{name}.{ext}{real_ext}'
2529         if not expected_real_ext or real_ext[1:] == expected_real_ext
2530         else f'{filename}.{ext}')
2531
2532
2533 def replace_extension(filename, ext, expected_real_ext=None):
2534     name, real_ext = os.path.splitext(filename)
2535     return '{}.{}'.format(
2536         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2537         ext)
2538
2539
2540 def check_executable(exe, args=[]):
2541     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2542     args can be a list of arguments for a short output (like -version) """
2543     try:
2544         Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
2545     except OSError:
2546         return False
2547     return exe
2548
2549
2550 def _get_exe_version_output(exe, args, *, to_screen=None):
2551     if to_screen:
2552         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2553     try:
2554         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2555         # SIGTTOU if yt-dlp is run in the background.
2556         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2557         out, _ = Popen(
2558             [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
2559             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
2560     except OSError:
2561         return False
2562     if isinstance(out, bytes):  # Python 2.x
2563         out = out.decode('ascii', 'ignore')
2564     return out
2565
2566
2567 def detect_exe_version(output, version_re=None, unrecognized='present'):
2568     assert isinstance(output, compat_str)
2569     if version_re is None:
2570         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2571     m = re.search(version_re, output)
2572     if m:
2573         return m.group(1)
2574     else:
2575         return unrecognized
2576
2577
2578 def get_exe_version(exe, args=['--version'],
2579                     version_re=None, unrecognized='present'):
2580     """ Returns the version of the specified executable,
2581     or False if the executable is not present """
2582     out = _get_exe_version_output(exe, args)
2583     return detect_exe_version(out, version_re, unrecognized) if out else False
2584
2585
2586 class LazyList(collections.abc.Sequence):
2587     """Lazy immutable list from an iterable
2588     Note that slices of a LazyList are lists and not LazyList"""
2589
2590     class IndexError(IndexError):
2591         pass
2592
2593     def __init__(self, iterable, *, reverse=False, _cache=None):
2594         self._iterable = iter(iterable)
2595         self._cache = [] if _cache is None else _cache
2596         self._reversed = reverse
2597
2598     def __iter__(self):
2599         if self._reversed:
2600             # We need to consume the entire iterable to iterate in reverse
2601             yield from self.exhaust()
2602             return
2603         yield from self._cache
2604         for item in self._iterable:
2605             self._cache.append(item)
2606             yield item
2607
2608     def _exhaust(self):
2609         self._cache.extend(self._iterable)
2610         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2611         return self._cache
2612
2613     def exhaust(self):
2614         """Evaluate the entire iterable"""
2615         return self._exhaust()[::-1 if self._reversed else 1]
2616
2617     @staticmethod
2618     def _reverse_index(x):
2619         return None if x is None else -(x + 1)
2620
2621     def __getitem__(self, idx):
2622         if isinstance(idx, slice):
2623             if self._reversed:
2624                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2625             start, stop, step = idx.start, idx.stop, idx.step or 1
2626         elif isinstance(idx, int):
2627             if self._reversed:
2628                 idx = self._reverse_index(idx)
2629             start, stop, step = idx, idx, 0
2630         else:
2631             raise TypeError('indices must be integers or slices')
2632         if ((start or 0) < 0 or (stop or 0) < 0
2633                 or (start is None and step < 0)
2634                 or (stop is None and step > 0)):
2635             # We need to consume the entire iterable to be able to slice from the end
2636             # Obviously, never use this with infinite iterables
2637             self._exhaust()
2638             try:
2639                 return self._cache[idx]
2640             except IndexError as e:
2641                 raise self.IndexError(e) from e
2642         n = max(start or 0, stop or 0) - len(self._cache) + 1
2643         if n > 0:
2644             self._cache.extend(itertools.islice(self._iterable, n))
2645         try:
2646             return self._cache[idx]
2647         except IndexError as e:
2648             raise self.IndexError(e) from e
2649
2650     def __bool__(self):
2651         try:
2652             self[-1] if self._reversed else self[0]
2653         except self.IndexError:
2654             return False
2655         return True
2656
2657     def __len__(self):
2658         self._exhaust()
2659         return len(self._cache)
2660
2661     def __reversed__(self):
2662         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2663
2664     def __copy__(self):
2665         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2666
2667     def __repr__(self):
2668         # repr and str should mimic a list. So we exhaust the iterable
2669         return repr(self.exhaust())
2670
2671     def __str__(self):
2672         return repr(self.exhaust())
2673
2674
2675 class PagedList:
2676
2677     class IndexError(IndexError):
2678         pass
2679
2680     def __len__(self):
2681         # This is only useful for tests
2682         return len(self.getslice())
2683
2684     def __init__(self, pagefunc, pagesize, use_cache=True):
2685         self._pagefunc = pagefunc
2686         self._pagesize = pagesize
2687         self._pagecount = float('inf')
2688         self._use_cache = use_cache
2689         self._cache = {}
2690
2691     def getpage(self, pagenum):
2692         page_results = self._cache.get(pagenum)
2693         if page_results is None:
2694             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2695         if self._use_cache:
2696             self._cache[pagenum] = page_results
2697         return page_results
2698
2699     def getslice(self, start=0, end=None):
2700         return list(self._getslice(start, end))
2701
2702     def _getslice(self, start, end):
2703         raise NotImplementedError('This method must be implemented by subclasses')
2704
2705     def __getitem__(self, idx):
2706         assert self._use_cache, 'Indexing PagedList requires cache'
2707         if not isinstance(idx, int) or idx < 0:
2708             raise TypeError('indices must be non-negative integers')
2709         entries = self.getslice(idx, idx + 1)
2710         if not entries:
2711             raise self.IndexError()
2712         return entries[0]
2713
2714
2715 class OnDemandPagedList(PagedList):
2716     """Download pages until a page with less than maximum results"""
2717
2718     def _getslice(self, start, end):
2719         for pagenum in itertools.count(start // self._pagesize):
2720             firstid = pagenum * self._pagesize
2721             nextfirstid = pagenum * self._pagesize + self._pagesize
2722             if start >= nextfirstid:
2723                 continue
2724
2725             startv = (
2726                 start % self._pagesize
2727                 if firstid <= start < nextfirstid
2728                 else 0)
2729             endv = (
2730                 ((end - 1) % self._pagesize) + 1
2731                 if (end is not None and firstid <= end <= nextfirstid)
2732                 else None)
2733
2734             try:
2735                 page_results = self.getpage(pagenum)
2736             except Exception:
2737                 self._pagecount = pagenum - 1
2738                 raise
2739             if startv != 0 or endv is not None:
2740                 page_results = page_results[startv:endv]
2741             yield from page_results
2742
2743             # A little optimization - if current page is not "full", ie. does
2744             # not contain page_size videos then we can assume that this page
2745             # is the last one - there are no more ids on further pages -
2746             # i.e. no need to query again.
2747             if len(page_results) + startv < self._pagesize:
2748                 break
2749
2750             # If we got the whole page, but the next page is not interesting,
2751             # break out early as well
2752             if end == nextfirstid:
2753                 break
2754
2755
2756 class InAdvancePagedList(PagedList):
2757     """PagedList with total number of pages known in advance"""
2758
2759     def __init__(self, pagefunc, pagecount, pagesize):
2760         PagedList.__init__(self, pagefunc, pagesize, True)
2761         self._pagecount = pagecount
2762
2763     def _getslice(self, start, end):
2764         start_page = start // self._pagesize
2765         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2766         skip_elems = start - start_page * self._pagesize
2767         only_more = None if end is None else end - start
2768         for pagenum in range(start_page, end_page):
2769             page_results = self.getpage(pagenum)
2770             if skip_elems:
2771                 page_results = page_results[skip_elems:]
2772                 skip_elems = None
2773             if only_more is not None:
2774                 if len(page_results) < only_more:
2775                     only_more -= len(page_results)
2776                 else:
2777                     yield from page_results[:only_more]
2778                     break
2779             yield from page_results
2780
2781
2782 def uppercase_escape(s):
2783     unicode_escape = codecs.getdecoder('unicode_escape')
2784     return re.sub(
2785         r'\\U[0-9a-fA-F]{8}',
2786         lambda m: unicode_escape(m.group(0))[0],
2787         s)
2788
2789
2790 def lowercase_escape(s):
2791     unicode_escape = codecs.getdecoder('unicode_escape')
2792     return re.sub(
2793         r'\\u[0-9a-fA-F]{4}',
2794         lambda m: unicode_escape(m.group(0))[0],
2795         s)
2796
2797
2798 def escape_rfc3986(s):
2799     """Escape non-ASCII characters as suggested by RFC 3986"""
2800     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2801
2802
2803 def escape_url(url):
2804     """Escape URL as suggested by RFC 3986"""
2805     url_parsed = compat_urllib_parse_urlparse(url)
2806     return url_parsed._replace(
2807         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2808         path=escape_rfc3986(url_parsed.path),
2809         params=escape_rfc3986(url_parsed.params),
2810         query=escape_rfc3986(url_parsed.query),
2811         fragment=escape_rfc3986(url_parsed.fragment)
2812     ).geturl()
2813
2814
2815 def parse_qs(url):
2816     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2817
2818
2819 def read_batch_urls(batch_fd):
2820     def fixup(url):
2821         if not isinstance(url, compat_str):
2822             url = url.decode('utf-8', 'replace')
2823         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
2824         for bom in BOM_UTF8:
2825             if url.startswith(bom):
2826                 url = url[len(bom):]
2827         url = url.lstrip()
2828         if not url or url.startswith(('#', ';', ']')):
2829             return False
2830         # "#" cannot be stripped out since it is part of the URI
2831         # However, it can be safely stipped out if follwing a whitespace
2832         return re.split(r'\s#', url, 1)[0].rstrip()
2833
2834     with contextlib.closing(batch_fd) as fd:
2835         return [url for url in map(fixup, fd) if url]
2836
2837
2838 def urlencode_postdata(*args, **kargs):
2839     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
2840
2841
2842 def update_url_query(url, query):
2843     if not query:
2844         return url
2845     parsed_url = compat_urlparse.urlparse(url)
2846     qs = compat_parse_qs(parsed_url.query)
2847     qs.update(query)
2848     return compat_urlparse.urlunparse(parsed_url._replace(
2849         query=compat_urllib_parse_urlencode(qs, True)))
2850
2851
2852 def update_Request(req, url=None, data=None, headers={}, query={}):
2853     req_headers = req.headers.copy()
2854     req_headers.update(headers)
2855     req_data = data or req.data
2856     req_url = update_url_query(url or req.get_full_url(), query)
2857     req_get_method = req.get_method()
2858     if req_get_method == 'HEAD':
2859         req_type = HEADRequest
2860     elif req_get_method == 'PUT':
2861         req_type = PUTRequest
2862     else:
2863         req_type = compat_urllib_request.Request
2864     new_req = req_type(
2865         req_url, data=req_data, headers=req_headers,
2866         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
2867     if hasattr(req, 'timeout'):
2868         new_req.timeout = req.timeout
2869     return new_req
2870
2871
2872 def _multipart_encode_impl(data, boundary):
2873     content_type = 'multipart/form-data; boundary=%s' % boundary
2874
2875     out = b''
2876     for k, v in data.items():
2877         out += b'--' + boundary.encode('ascii') + b'\r\n'
2878         if isinstance(k, compat_str):
2879             k = k.encode()
2880         if isinstance(v, compat_str):
2881             v = v.encode()
2882         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
2883         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
2884         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
2885         if boundary.encode('ascii') in content:
2886             raise ValueError('Boundary overlaps with data')
2887         out += content
2888
2889     out += b'--' + boundary.encode('ascii') + b'--\r\n'
2890
2891     return out, content_type
2892
2893
2894 def multipart_encode(data, boundary=None):
2895     '''
2896     Encode a dict to RFC 7578-compliant form-data
2897
2898     data:
2899         A dict where keys and values can be either Unicode or bytes-like
2900         objects.
2901     boundary:
2902         If specified a Unicode object, it's used as the boundary. Otherwise
2903         a random boundary is generated.
2904
2905     Reference: https://tools.ietf.org/html/rfc7578
2906     '''
2907     has_specified_boundary = boundary is not None
2908
2909     while True:
2910         if boundary is None:
2911             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
2912
2913         try:
2914             out, content_type = _multipart_encode_impl(data, boundary)
2915             break
2916         except ValueError:
2917             if has_specified_boundary:
2918                 raise
2919             boundary = None
2920
2921     return out, content_type
2922
2923
2924 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
2925     for val in map(d.get, variadic(key_or_keys)):
2926         if val is not None and (val or not skip_false_values):
2927             return val
2928     return default
2929
2930
2931 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
2932     for f in funcs:
2933         try:
2934             val = f(*args, **kwargs)
2935         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
2936             pass
2937         else:
2938             if expected_type is None or isinstance(val, expected_type):
2939                 return val
2940
2941
2942 def try_get(src, getter, expected_type=None):
2943     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
2944
2945
2946 def filter_dict(dct, cndn=lambda _, v: v is not None):
2947     return {k: v for k, v in dct.items() if cndn(k, v)}
2948
2949
2950 def merge_dicts(*dicts):
2951     merged = {}
2952     for a_dict in dicts:
2953         for k, v in a_dict.items():
2954             if (v is not None and k not in merged
2955                     or isinstance(v, str) and merged[k] == ''):
2956                 merged[k] = v
2957     return merged
2958
2959
2960 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
2961     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
2962
2963
2964 US_RATINGS = {
2965     'G': 0,
2966     'PG': 10,
2967     'PG-13': 13,
2968     'R': 16,
2969     'NC': 18,
2970 }
2971
2972
2973 TV_PARENTAL_GUIDELINES = {
2974     'TV-Y': 0,
2975     'TV-Y7': 7,
2976     'TV-G': 0,
2977     'TV-PG': 0,
2978     'TV-14': 14,
2979     'TV-MA': 17,
2980 }
2981
2982
2983 def parse_age_limit(s):
2984     # isinstance(False, int) is True. So type() must be used instead
2985     if type(s) is int:  # noqa: E721
2986         return s if 0 <= s <= 21 else None
2987     elif not isinstance(s, str):
2988         return None
2989     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
2990     if m:
2991         return int(m.group('age'))
2992     s = s.upper()
2993     if s in US_RATINGS:
2994         return US_RATINGS[s]
2995     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
2996     if m:
2997         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
2998     return None
2999
3000
3001 def strip_jsonp(code):
3002     return re.sub(
3003         r'''(?sx)^
3004             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3005             (?:\s*&&\s*(?P=func_name))?
3006             \s*\(\s*(?P<callback_data>.*)\);?
3007             \s*?(?://[^\n]*)*$''',
3008         r'\g<callback_data>', code)
3009
3010
3011 def js_to_json(code, vars={}):
3012     # vars is a dict of var, val pairs to substitute
3013     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3014     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3015     INTEGER_TABLE = (
3016         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3017         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3018     )
3019
3020     def fix_kv(m):
3021         v = m.group(0)
3022         if v in ('true', 'false', 'null'):
3023             return v
3024         elif v in ('undefined', 'void 0'):
3025             return 'null'
3026         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3027             return ""
3028
3029         if v[0] in ("'", '"'):
3030             v = re.sub(r'(?s)\\.|"', lambda m: {
3031                 '"': '\\"',
3032                 "\\'": "'",
3033                 '\\\n': '',
3034                 '\\x': '\\u00',
3035             }.get(m.group(0), m.group(0)), v[1:-1])
3036         else:
3037             for regex, base in INTEGER_TABLE:
3038                 im = re.match(regex, v)
3039                 if im:
3040                     i = int(im.group(1), base)
3041                     return '"%d":' % i if v.endswith(':') else '%d' % i
3042
3043             if v in vars:
3044                 return vars[v]
3045
3046         return '"%s"' % v
3047
3048     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3049
3050     return re.sub(r'''(?sx)
3051         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
3052         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
3053         {comment}|,(?={skip}[\]}}])|
3054         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3055         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3056         [0-9]+(?={skip}:)|
3057         !+
3058         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3059
3060
3061 def qualities(quality_ids):
3062     """ Get a numeric quality value out of a list of possible values """
3063     def q(qid):
3064         try:
3065             return quality_ids.index(qid)
3066         except ValueError:
3067             return -1
3068     return q
3069
3070
3071 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist')
3072
3073
3074 DEFAULT_OUTTMPL = {
3075     'default': '%(title)s [%(id)s].%(ext)s',
3076     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3077 }
3078 OUTTMPL_TYPES = {
3079     'chapter': None,
3080     'subtitle': None,
3081     'thumbnail': None,
3082     'description': 'description',
3083     'annotation': 'annotations.xml',
3084     'infojson': 'info.json',
3085     'link': None,
3086     'pl_video': None,
3087     'pl_thumbnail': None,
3088     'pl_description': 'description',
3089     'pl_infojson': 'info.json',
3090 }
3091
3092 # As of [1] format syntax is:
3093 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3094 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3095 STR_FORMAT_RE_TMPL = r'''(?x)
3096     (?<!%)(?P<prefix>(?:%%)*)
3097     %
3098     (?P<has_key>\((?P<key>{0})\))?
3099     (?P<format>
3100         (?P<conversion>[#0\-+ ]+)?
3101         (?P<min_width>\d+)?
3102         (?P<precision>\.\d+)?
3103         (?P<len_mod>[hlL])?  # unused in python
3104         {1}  # conversion type
3105     )
3106 '''
3107
3108
3109 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3110
3111
3112 def limit_length(s, length):
3113     """ Add ellipses to overly long strings """
3114     if s is None:
3115         return None
3116     ELLIPSES = '...'
3117     if len(s) > length:
3118         return s[:length - len(ELLIPSES)] + ELLIPSES
3119     return s
3120
3121
3122 def version_tuple(v):
3123     return tuple(int(e) for e in re.split(r'[-.]', v))
3124
3125
3126 def is_outdated_version(version, limit, assume_new=True):
3127     if not version:
3128         return not assume_new
3129     try:
3130         return version_tuple(version) < version_tuple(limit)
3131     except ValueError:
3132         return not assume_new
3133
3134
3135 def ytdl_is_updateable():
3136     """ Returns if yt-dlp can be updated with -U """
3137
3138     from .update import is_non_updateable
3139
3140     return not is_non_updateable()
3141
3142
3143 def args_to_str(args):
3144     # Get a short string representation for a subprocess command
3145     return ' '.join(compat_shlex_quote(a) for a in args)
3146
3147
3148 def error_to_compat_str(err):
3149     return str(err)
3150
3151
3152 def error_to_str(err):
3153     return f'{type(err).__name__}: {err}'
3154
3155
3156 def mimetype2ext(mt):
3157     if mt is None:
3158         return None
3159
3160     mt, _, params = mt.partition(';')
3161     mt = mt.strip()
3162
3163     FULL_MAP = {
3164         'audio/mp4': 'm4a',
3165         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3166         # it's the most popular one
3167         'audio/mpeg': 'mp3',
3168         'audio/x-wav': 'wav',
3169         'audio/wav': 'wav',
3170         'audio/wave': 'wav',
3171     }
3172
3173     ext = FULL_MAP.get(mt)
3174     if ext is not None:
3175         return ext
3176
3177     SUBTYPE_MAP = {
3178         '3gpp': '3gp',
3179         'smptett+xml': 'tt',
3180         'ttaf+xml': 'dfxp',
3181         'ttml+xml': 'ttml',
3182         'x-flv': 'flv',
3183         'x-mp4-fragmented': 'mp4',
3184         'x-ms-sami': 'sami',
3185         'x-ms-wmv': 'wmv',
3186         'mpegurl': 'm3u8',
3187         'x-mpegurl': 'm3u8',
3188         'vnd.apple.mpegurl': 'm3u8',
3189         'dash+xml': 'mpd',
3190         'f4m+xml': 'f4m',
3191         'hds+xml': 'f4m',
3192         'vnd.ms-sstr+xml': 'ism',
3193         'quicktime': 'mov',
3194         'mp2t': 'ts',
3195         'x-wav': 'wav',
3196         'filmstrip+json': 'fs',
3197         'svg+xml': 'svg',
3198     }
3199
3200     _, _, subtype = mt.rpartition('/')
3201     ext = SUBTYPE_MAP.get(subtype.lower())
3202     if ext is not None:
3203         return ext
3204
3205     SUFFIX_MAP = {
3206         'json': 'json',
3207         'xml': 'xml',
3208         'zip': 'zip',
3209         'gzip': 'gz',
3210     }
3211
3212     _, _, suffix = subtype.partition('+')
3213     ext = SUFFIX_MAP.get(suffix)
3214     if ext is not None:
3215         return ext
3216
3217     return subtype.replace('+', '.')
3218
3219
3220 def ext2mimetype(ext_or_url):
3221     if not ext_or_url:
3222         return None
3223     if '.' not in ext_or_url:
3224         ext_or_url = f'file.{ext_or_url}'
3225     return mimetypes.guess_type(ext_or_url)[0]
3226
3227
3228 def parse_codecs(codecs_str):
3229     # http://tools.ietf.org/html/rfc6381
3230     if not codecs_str:
3231         return {}
3232     split_codecs = list(filter(None, map(
3233         str.strip, codecs_str.strip().strip(',').split(','))))
3234     vcodec, acodec, scodec, hdr = None, None, None, None
3235     for full_codec in split_codecs:
3236         parts = full_codec.split('.')
3237         codec = parts[0].replace('0', '')
3238         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3239                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3240             if not vcodec:
3241                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
3242                 if codec in ('dvh1', 'dvhe'):
3243                     hdr = 'DV'
3244                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
3245                     hdr = 'HDR10'
3246                 elif full_codec.replace('0', '').startswith('vp9.2'):
3247                     hdr = 'HDR10'
3248         elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3249             if not acodec:
3250                 acodec = full_codec
3251         elif codec in ('stpp', 'wvtt',):
3252             if not scodec:
3253                 scodec = full_codec
3254         else:
3255             write_string(f'WARNING: Unknown codec {full_codec}\n')
3256     if vcodec or acodec or scodec:
3257         return {
3258             'vcodec': vcodec or 'none',
3259             'acodec': acodec or 'none',
3260             'dynamic_range': hdr,
3261             **({'scodec': scodec} if scodec is not None else {}),
3262         }
3263     elif len(split_codecs) == 2:
3264         return {
3265             'vcodec': split_codecs[0],
3266             'acodec': split_codecs[1],
3267         }
3268     return {}
3269
3270
3271 def urlhandle_detect_ext(url_handle):
3272     getheader = url_handle.headers.get
3273
3274     cd = getheader('Content-Disposition')
3275     if cd:
3276         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3277         if m:
3278             e = determine_ext(m.group('filename'), default_ext=None)
3279             if e:
3280                 return e
3281
3282     return mimetype2ext(getheader('Content-Type'))
3283
3284
3285 def encode_data_uri(data, mime_type):
3286     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3287
3288
3289 def age_restricted(content_limit, age_limit):
3290     """ Returns True iff the content should be blocked """
3291
3292     if age_limit is None:  # No limit set
3293         return False
3294     if content_limit is None:
3295         return False  # Content available for everyone
3296     return age_limit < content_limit
3297
3298
3299 def is_html(first_bytes):
3300     """ Detect whether a file contains HTML by examining its first bytes. """
3301
3302     BOMS = [
3303         (b'\xef\xbb\xbf', 'utf-8'),
3304         (b'\x00\x00\xfe\xff', 'utf-32-be'),
3305         (b'\xff\xfe\x00\x00', 'utf-32-le'),
3306         (b'\xff\xfe', 'utf-16-le'),
3307         (b'\xfe\xff', 'utf-16-be'),
3308     ]
3309
3310     encoding = 'utf-8'
3311     for bom, enc in BOMS:
3312         while first_bytes.startswith(bom):
3313             encoding, first_bytes = enc, first_bytes[len(bom):]
3314
3315     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3316
3317
3318 def determine_protocol(info_dict):
3319     protocol = info_dict.get('protocol')
3320     if protocol is not None:
3321         return protocol
3322
3323     url = sanitize_url(info_dict['url'])
3324     if url.startswith('rtmp'):
3325         return 'rtmp'
3326     elif url.startswith('mms'):
3327         return 'mms'
3328     elif url.startswith('rtsp'):
3329         return 'rtsp'
3330
3331     ext = determine_ext(url)
3332     if ext == 'm3u8':
3333         return 'm3u8'
3334     elif ext == 'f4m':
3335         return 'f4m'
3336
3337     return compat_urllib_parse_urlparse(url).scheme
3338
3339
3340 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3341     """ Render a list of rows, each as a list of values.
3342     Text after a \t will be right aligned """
3343     def width(string):
3344         return len(remove_terminal_sequences(string).replace('\t', ''))
3345
3346     def get_max_lens(table):
3347         return [max(width(str(v)) for v in col) for col in zip(*table)]
3348
3349     def filter_using_list(row, filterArray):
3350         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3351
3352     max_lens = get_max_lens(data) if hide_empty else []
3353     header_row = filter_using_list(header_row, max_lens)
3354     data = [filter_using_list(row, max_lens) for row in data]
3355
3356     table = [header_row] + data
3357     max_lens = get_max_lens(table)
3358     extra_gap += 1
3359     if delim:
3360         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3361         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3362     for row in table:
3363         for pos, text in enumerate(map(str, row)):
3364             if '\t' in text:
3365                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3366             else:
3367                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3368     ret = '\n'.join(''.join(row).rstrip() for row in table)
3369     return ret
3370
3371
3372 def _match_one(filter_part, dct, incomplete):
3373     # TODO: Generalize code with YoutubeDL._build_format_filter
3374     STRING_OPERATORS = {
3375         '*=': operator.contains,
3376         '^=': lambda attr, value: attr.startswith(value),
3377         '$=': lambda attr, value: attr.endswith(value),
3378         '~=': lambda attr, value: re.search(value, attr),
3379     }
3380     COMPARISON_OPERATORS = {
3381         **STRING_OPERATORS,
3382         '<=': operator.le,  # "<=" must be defined above "<"
3383         '<': operator.lt,
3384         '>=': operator.ge,
3385         '>': operator.gt,
3386         '=': operator.eq,
3387     }
3388
3389     if isinstance(incomplete, bool):
3390         is_incomplete = lambda _: incomplete
3391     else:
3392         is_incomplete = lambda k: k in incomplete
3393
3394     operator_rex = re.compile(r'''(?x)\s*
3395         (?P<key>[a-z_]+)
3396         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3397         (?:
3398             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3399             (?P<strval>.+?)
3400         )
3401         \s*$
3402         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3403     m = operator_rex.search(filter_part)
3404     if m:
3405         m = m.groupdict()
3406         unnegated_op = COMPARISON_OPERATORS[m['op']]
3407         if m['negation']:
3408             op = lambda attr, value: not unnegated_op(attr, value)
3409         else:
3410             op = unnegated_op
3411         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3412         if m['quote']:
3413             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3414         actual_value = dct.get(m['key'])
3415         numeric_comparison = None
3416         if isinstance(actual_value, (int, float)):
3417             # If the original field is a string and matching comparisonvalue is
3418             # a number we should respect the origin of the original field
3419             # and process comparison value as a string (see
3420             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3421             try:
3422                 numeric_comparison = int(comparison_value)
3423             except ValueError:
3424                 numeric_comparison = parse_filesize(comparison_value)
3425                 if numeric_comparison is None:
3426                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3427                 if numeric_comparison is None:
3428                     numeric_comparison = parse_duration(comparison_value)
3429         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3430             raise ValueError('Operator %s only supports string values!' % m['op'])
3431         if actual_value is None:
3432             return is_incomplete(m['key']) or m['none_inclusive']
3433         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3434
3435     UNARY_OPERATORS = {
3436         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3437         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3438     }
3439     operator_rex = re.compile(r'''(?x)\s*
3440         (?P<op>%s)\s*(?P<key>[a-z_]+)
3441         \s*$
3442         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3443     m = operator_rex.search(filter_part)
3444     if m:
3445         op = UNARY_OPERATORS[m.group('op')]
3446         actual_value = dct.get(m.group('key'))
3447         if is_incomplete(m.group('key')) and actual_value is None:
3448             return True
3449         return op(actual_value)
3450
3451     raise ValueError('Invalid filter part %r' % filter_part)
3452
3453
3454 def match_str(filter_str, dct, incomplete=False):
3455     """ Filter a dictionary with a simple string syntax.
3456     @returns           Whether the filter passes
3457     @param incomplete  Set of keys that is expected to be missing from dct.
3458                        Can be True/False to indicate all/none of the keys may be missing.
3459                        All conditions on incomplete keys pass if the key is missing
3460     """
3461     return all(
3462         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3463         for filter_part in re.split(r'(?<!\\)&', filter_str))
3464
3465
3466 def match_filter_func(filters):
3467     if not filters:
3468         return None
3469     filters = set(variadic(filters))
3470
3471     interactive = '-' in filters
3472     if interactive:
3473         filters.remove('-')
3474
3475     def _match_func(info_dict, incomplete=False):
3476         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3477             return NO_DEFAULT if interactive and not incomplete else None
3478         else:
3479             video_title = info_dict.get('title') or info_dict.get('id') or 'video'
3480             filter_str = ') | ('.join(map(str.strip, filters))
3481             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3482     return _match_func
3483
3484
3485 def parse_dfxp_time_expr(time_expr):
3486     if not time_expr:
3487         return
3488
3489     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3490     if mobj:
3491         return float(mobj.group('time_offset'))
3492
3493     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3494     if mobj:
3495         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3496
3497
3498 def srt_subtitles_timecode(seconds):
3499     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3500
3501
3502 def ass_subtitles_timecode(seconds):
3503     time = timetuple_from_msec(seconds * 1000)
3504     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3505
3506
3507 def dfxp2srt(dfxp_data):
3508     '''
3509     @param dfxp_data A bytes-like object containing DFXP data
3510     @returns A unicode object containing converted SRT data
3511     '''
3512     LEGACY_NAMESPACES = (
3513         (b'http://www.w3.org/ns/ttml', [
3514             b'http://www.w3.org/2004/11/ttaf1',
3515             b'http://www.w3.org/2006/04/ttaf1',
3516             b'http://www.w3.org/2006/10/ttaf1',
3517         ]),
3518         (b'http://www.w3.org/ns/ttml#styling', [
3519             b'http://www.w3.org/ns/ttml#style',
3520         ]),
3521     )
3522
3523     SUPPORTED_STYLING = [
3524         'color',
3525         'fontFamily',
3526         'fontSize',
3527         'fontStyle',
3528         'fontWeight',
3529         'textDecoration'
3530     ]
3531
3532     _x = functools.partial(xpath_with_ns, ns_map={
3533         'xml': 'http://www.w3.org/XML/1998/namespace',
3534         'ttml': 'http://www.w3.org/ns/ttml',
3535         'tts': 'http://www.w3.org/ns/ttml#styling',
3536     })
3537
3538     styles = {}
3539     default_style = {}
3540
3541     class TTMLPElementParser:
3542         _out = ''
3543         _unclosed_elements = []
3544         _applied_styles = []
3545
3546         def start(self, tag, attrib):
3547             if tag in (_x('ttml:br'), 'br'):
3548                 self._out += '\n'
3549             else:
3550                 unclosed_elements = []
3551                 style = {}
3552                 element_style_id = attrib.get('style')
3553                 if default_style:
3554                     style.update(default_style)
3555                 if element_style_id:
3556                     style.update(styles.get(element_style_id, {}))
3557                 for prop in SUPPORTED_STYLING:
3558                     prop_val = attrib.get(_x('tts:' + prop))
3559                     if prop_val:
3560                         style[prop] = prop_val
3561                 if style:
3562                     font = ''
3563                     for k, v in sorted(style.items()):
3564                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3565                             continue
3566                         if k == 'color':
3567                             font += ' color="%s"' % v
3568                         elif k == 'fontSize':
3569                             font += ' size="%s"' % v
3570                         elif k == 'fontFamily':
3571                             font += ' face="%s"' % v
3572                         elif k == 'fontWeight' and v == 'bold':
3573                             self._out += '<b>'
3574                             unclosed_elements.append('b')
3575                         elif k == 'fontStyle' and v == 'italic':
3576                             self._out += '<i>'
3577                             unclosed_elements.append('i')
3578                         elif k == 'textDecoration' and v == 'underline':
3579                             self._out += '<u>'
3580                             unclosed_elements.append('u')
3581                     if font:
3582                         self._out += '<font' + font + '>'
3583                         unclosed_elements.append('font')
3584                     applied_style = {}
3585                     if self._applied_styles:
3586                         applied_style.update(self._applied_styles[-1])
3587                     applied_style.update(style)
3588                     self._applied_styles.append(applied_style)
3589                 self._unclosed_elements.append(unclosed_elements)
3590
3591         def end(self, tag):
3592             if tag not in (_x('ttml:br'), 'br'):
3593                 unclosed_elements = self._unclosed_elements.pop()
3594                 for element in reversed(unclosed_elements):
3595                     self._out += '</%s>' % element
3596                 if unclosed_elements and self._applied_styles:
3597                     self._applied_styles.pop()
3598
3599         def data(self, data):
3600             self._out += data
3601
3602         def close(self):
3603             return self._out.strip()
3604
3605     def parse_node(node):
3606         target = TTMLPElementParser()
3607         parser = xml.etree.ElementTree.XMLParser(target=target)
3608         parser.feed(xml.etree.ElementTree.tostring(node))
3609         return parser.close()
3610
3611     for k, v in LEGACY_NAMESPACES:
3612         for ns in v:
3613             dfxp_data = dfxp_data.replace(ns, k)
3614
3615     dfxp = compat_etree_fromstring(dfxp_data)
3616     out = []
3617     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3618
3619     if not paras:
3620         raise ValueError('Invalid dfxp/TTML subtitle')
3621
3622     repeat = False
3623     while True:
3624         for style in dfxp.findall(_x('.//ttml:style')):
3625             style_id = style.get('id') or style.get(_x('xml:id'))
3626             if not style_id:
3627                 continue
3628             parent_style_id = style.get('style')
3629             if parent_style_id:
3630                 if parent_style_id not in styles:
3631                     repeat = True
3632                     continue
3633                 styles[style_id] = styles[parent_style_id].copy()
3634             for prop in SUPPORTED_STYLING:
3635                 prop_val = style.get(_x('tts:' + prop))
3636                 if prop_val:
3637                     styles.setdefault(style_id, {})[prop] = prop_val
3638         if repeat:
3639             repeat = False
3640         else:
3641             break
3642
3643     for p in ('body', 'div'):
3644         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3645         if ele is None:
3646             continue
3647         style = styles.get(ele.get('style'))
3648         if not style:
3649             continue
3650         default_style.update(style)
3651
3652     for para, index in zip(paras, itertools.count(1)):
3653         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3654         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3655         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3656         if begin_time is None:
3657             continue
3658         if not end_time:
3659             if not dur:
3660                 continue
3661             end_time = begin_time + dur
3662         out.append('%d\n%s --> %s\n%s\n\n' % (
3663             index,
3664             srt_subtitles_timecode(begin_time),
3665             srt_subtitles_timecode(end_time),
3666             parse_node(para)))
3667
3668     return ''.join(out)
3669
3670
3671 def cli_option(params, command_option, param, separator=None):
3672     param = params.get(param)
3673     return ([] if param is None
3674             else [command_option, str(param)] if separator is None
3675             else [f'{command_option}{separator}{param}'])
3676
3677
3678 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3679     param = params.get(param)
3680     assert param in (True, False, None)
3681     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
3682
3683
3684 def cli_valueless_option(params, command_option, param, expected_value=True):
3685     return [command_option] if params.get(param) == expected_value else []
3686
3687
3688 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3689     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3690         if use_compat:
3691             return argdict
3692         else:
3693             argdict = None
3694     if argdict is None:
3695         return default
3696     assert isinstance(argdict, dict)
3697
3698     assert isinstance(keys, (list, tuple))
3699     for key_list in keys:
3700         arg_list = list(filter(
3701             lambda x: x is not None,
3702             [argdict.get(key.lower()) for key in variadic(key_list)]))
3703         if arg_list:
3704             return [arg for args in arg_list for arg in args]
3705     return default
3706
3707
3708 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3709     main_key, exe = main_key.lower(), exe.lower()
3710     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3711     keys = [f'{root_key}{k}' for k in (keys or [''])]
3712     if root_key in keys:
3713         if main_key != exe:
3714             keys.append((main_key, exe))
3715         keys.append('default')
3716     else:
3717         use_compat = False
3718     return cli_configuration_args(argdict, keys, default, use_compat)
3719
3720
3721 class ISO639Utils:
3722     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3723     _lang_map = {
3724         'aa': 'aar',
3725         'ab': 'abk',
3726         'ae': 'ave',
3727         'af': 'afr',
3728         'ak': 'aka',
3729         'am': 'amh',
3730         'an': 'arg',
3731         'ar': 'ara',
3732         'as': 'asm',
3733         'av': 'ava',
3734         'ay': 'aym',
3735         'az': 'aze',
3736         'ba': 'bak',
3737         'be': 'bel',
3738         'bg': 'bul',
3739         'bh': 'bih',
3740         'bi': 'bis',
3741         'bm': 'bam',
3742         'bn': 'ben',
3743         'bo': 'bod',
3744         'br': 'bre',
3745         'bs': 'bos',
3746         'ca': 'cat',
3747         'ce': 'che',
3748         'ch': 'cha',
3749         'co': 'cos',
3750         'cr': 'cre',
3751         'cs': 'ces',
3752         'cu': 'chu',
3753         'cv': 'chv',
3754         'cy': 'cym',
3755         'da': 'dan',
3756         'de': 'deu',
3757         'dv': 'div',
3758         'dz': 'dzo',
3759         'ee': 'ewe',
3760         'el': 'ell',
3761         'en': 'eng',
3762         'eo': 'epo',
3763         'es': 'spa',
3764         'et': 'est',
3765         'eu': 'eus',
3766         'fa': 'fas',
3767         'ff': 'ful',
3768         'fi': 'fin',
3769         'fj': 'fij',
3770         'fo': 'fao',
3771         'fr': 'fra',
3772         'fy': 'fry',
3773         'ga': 'gle',
3774         'gd': 'gla',
3775         'gl': 'glg',
3776         'gn': 'grn',
3777         'gu': 'guj',
3778         'gv': 'glv',
3779         'ha': 'hau',
3780         'he': 'heb',
3781         'iw': 'heb',  # Replaced by he in 1989 revision
3782         'hi': 'hin',
3783         'ho': 'hmo',
3784         'hr': 'hrv',
3785         'ht': 'hat',
3786         'hu': 'hun',
3787         'hy': 'hye',
3788         'hz': 'her',
3789         'ia': 'ina',
3790         'id': 'ind',
3791         'in': 'ind',  # Replaced by id in 1989 revision
3792         'ie': 'ile',
3793         'ig': 'ibo',
3794         'ii': 'iii',
3795         'ik': 'ipk',
3796         'io': 'ido',
3797         'is': 'isl',
3798         'it': 'ita',
3799         'iu': 'iku',
3800         'ja': 'jpn',
3801         'jv': 'jav',
3802         'ka': 'kat',
3803         'kg': 'kon',
3804         'ki': 'kik',
3805         'kj': 'kua',
3806         'kk': 'kaz',
3807         'kl': 'kal',
3808         'km': 'khm',
3809         'kn': 'kan',
3810         'ko': 'kor',
3811         'kr': 'kau',
3812         'ks': 'kas',
3813         'ku': 'kur',
3814         'kv': 'kom',
3815         'kw': 'cor',
3816         'ky': 'kir',
3817         'la': 'lat',
3818         'lb': 'ltz',
3819         'lg': 'lug',
3820         'li': 'lim',
3821         'ln': 'lin',
3822         'lo': 'lao',
3823         'lt': 'lit',
3824         'lu': 'lub',
3825         'lv': 'lav',
3826         'mg': 'mlg',
3827         'mh': 'mah',
3828         'mi': 'mri',
3829         'mk': 'mkd',
3830         'ml': 'mal',
3831         'mn': 'mon',
3832         'mr': 'mar',
3833         'ms': 'msa',
3834         'mt': 'mlt',
3835         'my': 'mya',
3836         'na': 'nau',
3837         'nb': 'nob',
3838         'nd': 'nde',
3839         'ne': 'nep',
3840         'ng': 'ndo',
3841         'nl': 'nld',
3842         'nn': 'nno',
3843         'no': 'nor',
3844         'nr': 'nbl',
3845         'nv': 'nav',
3846         'ny': 'nya',
3847         'oc': 'oci',
3848         'oj': 'oji',
3849         'om': 'orm',
3850         'or': 'ori',
3851         'os': 'oss',
3852         'pa': 'pan',
3853         'pi': 'pli',
3854         'pl': 'pol',
3855         'ps': 'pus',
3856         'pt': 'por',
3857         'qu': 'que',
3858         'rm': 'roh',
3859         'rn': 'run',
3860         'ro': 'ron',
3861         'ru': 'rus',
3862         'rw': 'kin',
3863         'sa': 'san',
3864         'sc': 'srd',
3865         'sd': 'snd',
3866         'se': 'sme',
3867         'sg': 'sag',
3868         'si': 'sin',
3869         'sk': 'slk',
3870         'sl': 'slv',
3871         'sm': 'smo',
3872         'sn': 'sna',
3873         'so': 'som',
3874         'sq': 'sqi',
3875         'sr': 'srp',
3876         'ss': 'ssw',
3877         'st': 'sot',
3878         'su': 'sun',
3879         'sv': 'swe',
3880         'sw': 'swa',
3881         'ta': 'tam',
3882         'te': 'tel',
3883         'tg': 'tgk',
3884         'th': 'tha',
3885         'ti': 'tir',
3886         'tk': 'tuk',
3887         'tl': 'tgl',
3888         'tn': 'tsn',
3889         'to': 'ton',
3890         'tr': 'tur',
3891         'ts': 'tso',
3892         'tt': 'tat',
3893         'tw': 'twi',
3894         'ty': 'tah',
3895         'ug': 'uig',
3896         'uk': 'ukr',
3897         'ur': 'urd',
3898         'uz': 'uzb',
3899         've': 'ven',
3900         'vi': 'vie',
3901         'vo': 'vol',
3902         'wa': 'wln',
3903         'wo': 'wol',
3904         'xh': 'xho',
3905         'yi': 'yid',
3906         'ji': 'yid',  # Replaced by yi in 1989 revision
3907         'yo': 'yor',
3908         'za': 'zha',
3909         'zh': 'zho',
3910         'zu': 'zul',
3911     }
3912
3913     @classmethod
3914     def short2long(cls, code):
3915         """Convert language code from ISO 639-1 to ISO 639-2/T"""
3916         return cls._lang_map.get(code[:2])
3917
3918     @classmethod
3919     def long2short(cls, code):
3920         """Convert language code from ISO 639-2/T to ISO 639-1"""
3921         for short_name, long_name in cls._lang_map.items():
3922             if long_name == code:
3923                 return short_name
3924
3925
3926 class ISO3166Utils:
3927     # From http://data.okfn.org/data/core/country-list
3928     _country_map = {
3929         'AF': 'Afghanistan',
3930         'AX': 'Åland Islands',
3931         'AL': 'Albania',
3932         'DZ': 'Algeria',
3933         'AS': 'American Samoa',
3934         'AD': 'Andorra',
3935         'AO': 'Angola',
3936         'AI': 'Anguilla',
3937         'AQ': 'Antarctica',
3938         'AG': 'Antigua and Barbuda',
3939         'AR': 'Argentina',
3940         'AM': 'Armenia',
3941         'AW': 'Aruba',
3942         'AU': 'Australia',
3943         'AT': 'Austria',
3944         'AZ': 'Azerbaijan',
3945         'BS': 'Bahamas',
3946         'BH': 'Bahrain',
3947         'BD': 'Bangladesh',
3948         'BB': 'Barbados',
3949         'BY': 'Belarus',
3950         'BE': 'Belgium',
3951         'BZ': 'Belize',
3952         'BJ': 'Benin',
3953         'BM': 'Bermuda',
3954         'BT': 'Bhutan',
3955         'BO': 'Bolivia, Plurinational State of',
3956         'BQ': 'Bonaire, Sint Eustatius and Saba',
3957         'BA': 'Bosnia and Herzegovina',
3958         'BW': 'Botswana',
3959         'BV': 'Bouvet Island',
3960         'BR': 'Brazil',
3961         'IO': 'British Indian Ocean Territory',
3962         'BN': 'Brunei Darussalam',
3963         'BG': 'Bulgaria',
3964         'BF': 'Burkina Faso',
3965         'BI': 'Burundi',
3966         'KH': 'Cambodia',
3967         'CM': 'Cameroon',
3968         'CA': 'Canada',
3969         'CV': 'Cape Verde',
3970         'KY': 'Cayman Islands',
3971         'CF': 'Central African Republic',
3972         'TD': 'Chad',
3973         'CL': 'Chile',
3974         'CN': 'China',
3975         'CX': 'Christmas Island',
3976         'CC': 'Cocos (Keeling) Islands',
3977         'CO': 'Colombia',
3978         'KM': 'Comoros',
3979         'CG': 'Congo',
3980         'CD': 'Congo, the Democratic Republic of the',
3981         'CK': 'Cook Islands',
3982         'CR': 'Costa Rica',
3983         'CI': 'Côte d\'Ivoire',
3984         'HR': 'Croatia',
3985         'CU': 'Cuba',
3986         'CW': 'Curaçao',
3987         'CY': 'Cyprus',
3988         'CZ': 'Czech Republic',
3989         'DK': 'Denmark',
3990         'DJ': 'Djibouti',
3991         'DM': 'Dominica',
3992         'DO': 'Dominican Republic',
3993         'EC': 'Ecuador',
3994         'EG': 'Egypt',
3995         'SV': 'El Salvador',
3996         'GQ': 'Equatorial Guinea',
3997         'ER': 'Eritrea',
3998         'EE': 'Estonia',
3999         'ET': 'Ethiopia',
4000         'FK': 'Falkland Islands (Malvinas)',
4001         'FO': 'Faroe Islands',
4002         'FJ': 'Fiji',
4003         'FI': 'Finland',
4004         'FR': 'France',
4005         'GF': 'French Guiana',
4006         'PF': 'French Polynesia',
4007         'TF': 'French Southern Territories',
4008         'GA': 'Gabon',
4009         'GM': 'Gambia',
4010         'GE': 'Georgia',
4011         'DE': 'Germany',
4012         'GH': 'Ghana',
4013         'GI': 'Gibraltar',
4014         'GR': 'Greece',
4015         'GL': 'Greenland',
4016         'GD': 'Grenada',
4017         'GP': 'Guadeloupe',
4018         'GU': 'Guam',
4019         'GT': 'Guatemala',
4020         'GG': 'Guernsey',
4021         'GN': 'Guinea',
4022         'GW': 'Guinea-Bissau',
4023         'GY': 'Guyana',
4024         'HT': 'Haiti',
4025         'HM': 'Heard Island and McDonald Islands',
4026         'VA': 'Holy See (Vatican City State)',
4027         'HN': 'Honduras',
4028         'HK': 'Hong Kong',
4029         'HU': 'Hungary',
4030         'IS': 'Iceland',
4031         'IN': 'India',
4032         'ID': 'Indonesia',
4033         'IR': 'Iran, Islamic Republic of',
4034         'IQ': 'Iraq',
4035         'IE': 'Ireland',
4036         'IM': 'Isle of Man',
4037         'IL': 'Israel',
4038         'IT': 'Italy',
4039         'JM': 'Jamaica',
4040         'JP': 'Japan',
4041         'JE': 'Jersey',
4042         'JO': 'Jordan',
4043         'KZ': 'Kazakhstan',
4044         'KE': 'Kenya',
4045         'KI': 'Kiribati',
4046         'KP': 'Korea, Democratic People\'s Republic of',
4047         'KR': 'Korea, Republic of',
4048         'KW': 'Kuwait',
4049         'KG': 'Kyrgyzstan',
4050         'LA': 'Lao People\'s Democratic Republic',
4051         'LV': 'Latvia',
4052         'LB': 'Lebanon',
4053         'LS': 'Lesotho',
4054         'LR': 'Liberia',
4055         'LY': 'Libya',
4056         'LI': 'Liechtenstein',
4057         'LT': 'Lithuania',
4058         'LU': 'Luxembourg',
4059         'MO': 'Macao',
4060         'MK': 'Macedonia, the Former Yugoslav Republic of',
4061         'MG': 'Madagascar',
4062         'MW': 'Malawi',
4063         'MY': 'Malaysia',
4064         'MV': 'Maldives',
4065         'ML': 'Mali',
4066         'MT': 'Malta',
4067         'MH': 'Marshall Islands',
4068         'MQ': 'Martinique',
4069         'MR': 'Mauritania',
4070         'MU': 'Mauritius',
4071         'YT': 'Mayotte',
4072         'MX': 'Mexico',
4073         'FM': 'Micronesia, Federated States of',
4074         'MD': 'Moldova, Republic of',
4075         'MC': 'Monaco',
4076         'MN': 'Mongolia',
4077         'ME': 'Montenegro',
4078         'MS': 'Montserrat',
4079         'MA': 'Morocco',
4080         'MZ': 'Mozambique',
4081         'MM': 'Myanmar',
4082         'NA': 'Namibia',
4083         'NR': 'Nauru',
4084         'NP': 'Nepal',
4085         'NL': 'Netherlands',
4086         'NC': 'New Caledonia',
4087         'NZ': 'New Zealand',
4088         'NI': 'Nicaragua',
4089         'NE': 'Niger',
4090         'NG': 'Nigeria',
4091         'NU': 'Niue',
4092         'NF': 'Norfolk Island',
4093         'MP': 'Northern Mariana Islands',
4094         'NO': 'Norway',
4095         'OM': 'Oman',
4096         'PK': 'Pakistan',
4097         'PW': 'Palau',
4098         'PS': 'Palestine, State of',
4099         'PA': 'Panama',
4100         'PG': 'Papua New Guinea',
4101         'PY': 'Paraguay',
4102         'PE': 'Peru',
4103         'PH': 'Philippines',
4104         'PN': 'Pitcairn',
4105         'PL': 'Poland',
4106         'PT': 'Portugal',
4107         'PR': 'Puerto Rico',
4108         'QA': 'Qatar',
4109         'RE': 'Réunion',
4110         'RO': 'Romania',
4111         'RU': 'Russian Federation',
4112         'RW': 'Rwanda',
4113         'BL': 'Saint Barthélemy',
4114         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4115         'KN': 'Saint Kitts and Nevis',
4116         'LC': 'Saint Lucia',
4117         'MF': 'Saint Martin (French part)',
4118         'PM': 'Saint Pierre and Miquelon',
4119         'VC': 'Saint Vincent and the Grenadines',
4120         'WS': 'Samoa',
4121         'SM': 'San Marino',
4122         'ST': 'Sao Tome and Principe',
4123         'SA': 'Saudi Arabia',
4124         'SN': 'Senegal',
4125         'RS': 'Serbia',
4126         'SC': 'Seychelles',
4127         'SL': 'Sierra Leone',
4128         'SG': 'Singapore',
4129         'SX': 'Sint Maarten (Dutch part)',
4130         'SK': 'Slovakia',
4131         'SI': 'Slovenia',
4132         'SB': 'Solomon Islands',
4133         'SO': 'Somalia',
4134         'ZA': 'South Africa',
4135         'GS': 'South Georgia and the South Sandwich Islands',
4136         'SS': 'South Sudan',
4137         'ES': 'Spain',
4138         'LK': 'Sri Lanka',
4139         'SD': 'Sudan',
4140         'SR': 'Suriname',
4141         'SJ': 'Svalbard and Jan Mayen',
4142         'SZ': 'Swaziland',
4143         'SE': 'Sweden',
4144         'CH': 'Switzerland',
4145         'SY': 'Syrian Arab Republic',
4146         'TW': 'Taiwan, Province of China',
4147         'TJ': 'Tajikistan',
4148         'TZ': 'Tanzania, United Republic of',
4149         'TH': 'Thailand',
4150         'TL': 'Timor-Leste',
4151         'TG': 'Togo',
4152         'TK': 'Tokelau',
4153         'TO': 'Tonga',
4154         'TT': 'Trinidad and Tobago',
4155         'TN': 'Tunisia',
4156         'TR': 'Turkey',
4157         'TM': 'Turkmenistan',
4158         'TC': 'Turks and Caicos Islands',
4159         'TV': 'Tuvalu',
4160         'UG': 'Uganda',
4161         'UA': 'Ukraine',
4162         'AE': 'United Arab Emirates',
4163         'GB': 'United Kingdom',
4164         'US': 'United States',
4165         'UM': 'United States Minor Outlying Islands',
4166         'UY': 'Uruguay',
4167         'UZ': 'Uzbekistan',
4168         'VU': 'Vanuatu',
4169         'VE': 'Venezuela, Bolivarian Republic of',
4170         'VN': 'Viet Nam',
4171         'VG': 'Virgin Islands, British',
4172         'VI': 'Virgin Islands, U.S.',
4173         'WF': 'Wallis and Futuna',
4174         'EH': 'Western Sahara',
4175         'YE': 'Yemen',
4176         'ZM': 'Zambia',
4177         'ZW': 'Zimbabwe',
4178         # Not ISO 3166 codes, but used for IP blocks
4179         'AP': 'Asia/Pacific Region',
4180         'EU': 'Europe',
4181     }
4182
4183     @classmethod
4184     def short2full(cls, code):
4185         """Convert an ISO 3166-2 country code to the corresponding full name"""
4186         return cls._country_map.get(code.upper())
4187
4188
4189 class GeoUtils:
4190     # Major IPv4 address blocks per country
4191     _country_ip_map = {
4192         'AD': '46.172.224.0/19',
4193         'AE': '94.200.0.0/13',
4194         'AF': '149.54.0.0/17',
4195         'AG': '209.59.64.0/18',
4196         'AI': '204.14.248.0/21',
4197         'AL': '46.99.0.0/16',
4198         'AM': '46.70.0.0/15',
4199         'AO': '105.168.0.0/13',
4200         'AP': '182.50.184.0/21',
4201         'AQ': '23.154.160.0/24',
4202         'AR': '181.0.0.0/12',
4203         'AS': '202.70.112.0/20',
4204         'AT': '77.116.0.0/14',
4205         'AU': '1.128.0.0/11',
4206         'AW': '181.41.0.0/18',
4207         'AX': '185.217.4.0/22',
4208         'AZ': '5.197.0.0/16',
4209         'BA': '31.176.128.0/17',
4210         'BB': '65.48.128.0/17',
4211         'BD': '114.130.0.0/16',
4212         'BE': '57.0.0.0/8',
4213         'BF': '102.178.0.0/15',
4214         'BG': '95.42.0.0/15',
4215         'BH': '37.131.0.0/17',
4216         'BI': '154.117.192.0/18',
4217         'BJ': '137.255.0.0/16',
4218         'BL': '185.212.72.0/23',
4219         'BM': '196.12.64.0/18',
4220         'BN': '156.31.0.0/16',
4221         'BO': '161.56.0.0/16',
4222         'BQ': '161.0.80.0/20',
4223         'BR': '191.128.0.0/12',
4224         'BS': '24.51.64.0/18',
4225         'BT': '119.2.96.0/19',
4226         'BW': '168.167.0.0/16',
4227         'BY': '178.120.0.0/13',
4228         'BZ': '179.42.192.0/18',
4229         'CA': '99.224.0.0/11',
4230         'CD': '41.243.0.0/16',
4231         'CF': '197.242.176.0/21',
4232         'CG': '160.113.0.0/16',
4233         'CH': '85.0.0.0/13',
4234         'CI': '102.136.0.0/14',
4235         'CK': '202.65.32.0/19',
4236         'CL': '152.172.0.0/14',
4237         'CM': '102.244.0.0/14',
4238         'CN': '36.128.0.0/10',
4239         'CO': '181.240.0.0/12',
4240         'CR': '201.192.0.0/12',
4241         'CU': '152.206.0.0/15',
4242         'CV': '165.90.96.0/19',
4243         'CW': '190.88.128.0/17',
4244         'CY': '31.153.0.0/16',
4245         'CZ': '88.100.0.0/14',
4246         'DE': '53.0.0.0/8',
4247         'DJ': '197.241.0.0/17',
4248         'DK': '87.48.0.0/12',
4249         'DM': '192.243.48.0/20',
4250         'DO': '152.166.0.0/15',
4251         'DZ': '41.96.0.0/12',
4252         'EC': '186.68.0.0/15',
4253         'EE': '90.190.0.0/15',
4254         'EG': '156.160.0.0/11',
4255         'ER': '196.200.96.0/20',
4256         'ES': '88.0.0.0/11',
4257         'ET': '196.188.0.0/14',
4258         'EU': '2.16.0.0/13',
4259         'FI': '91.152.0.0/13',
4260         'FJ': '144.120.0.0/16',
4261         'FK': '80.73.208.0/21',
4262         'FM': '119.252.112.0/20',
4263         'FO': '88.85.32.0/19',
4264         'FR': '90.0.0.0/9',
4265         'GA': '41.158.0.0/15',
4266         'GB': '25.0.0.0/8',
4267         'GD': '74.122.88.0/21',
4268         'GE': '31.146.0.0/16',
4269         'GF': '161.22.64.0/18',
4270         'GG': '62.68.160.0/19',
4271         'GH': '154.160.0.0/12',
4272         'GI': '95.164.0.0/16',
4273         'GL': '88.83.0.0/19',
4274         'GM': '160.182.0.0/15',
4275         'GN': '197.149.192.0/18',
4276         'GP': '104.250.0.0/19',
4277         'GQ': '105.235.224.0/20',
4278         'GR': '94.64.0.0/13',
4279         'GT': '168.234.0.0/16',
4280         'GU': '168.123.0.0/16',
4281         'GW': '197.214.80.0/20',
4282         'GY': '181.41.64.0/18',
4283         'HK': '113.252.0.0/14',
4284         'HN': '181.210.0.0/16',
4285         'HR': '93.136.0.0/13',
4286         'HT': '148.102.128.0/17',
4287         'HU': '84.0.0.0/14',
4288         'ID': '39.192.0.0/10',
4289         'IE': '87.32.0.0/12',
4290         'IL': '79.176.0.0/13',
4291         'IM': '5.62.80.0/20',
4292         'IN': '117.192.0.0/10',
4293         'IO': '203.83.48.0/21',
4294         'IQ': '37.236.0.0/14',
4295         'IR': '2.176.0.0/12',
4296         'IS': '82.221.0.0/16',
4297         'IT': '79.0.0.0/10',
4298         'JE': '87.244.64.0/18',
4299         'JM': '72.27.0.0/17',
4300         'JO': '176.29.0.0/16',
4301         'JP': '133.0.0.0/8',
4302         'KE': '105.48.0.0/12',
4303         'KG': '158.181.128.0/17',
4304         'KH': '36.37.128.0/17',
4305         'KI': '103.25.140.0/22',
4306         'KM': '197.255.224.0/20',
4307         'KN': '198.167.192.0/19',
4308         'KP': '175.45.176.0/22',
4309         'KR': '175.192.0.0/10',
4310         'KW': '37.36.0.0/14',
4311         'KY': '64.96.0.0/15',
4312         'KZ': '2.72.0.0/13',
4313         'LA': '115.84.64.0/18',
4314         'LB': '178.135.0.0/16',
4315         'LC': '24.92.144.0/20',
4316         'LI': '82.117.0.0/19',
4317         'LK': '112.134.0.0/15',
4318         'LR': '102.183.0.0/16',
4319         'LS': '129.232.0.0/17',
4320         'LT': '78.56.0.0/13',
4321         'LU': '188.42.0.0/16',
4322         'LV': '46.109.0.0/16',
4323         'LY': '41.252.0.0/14',
4324         'MA': '105.128.0.0/11',
4325         'MC': '88.209.64.0/18',
4326         'MD': '37.246.0.0/16',
4327         'ME': '178.175.0.0/17',
4328         'MF': '74.112.232.0/21',
4329         'MG': '154.126.0.0/17',
4330         'MH': '117.103.88.0/21',
4331         'MK': '77.28.0.0/15',
4332         'ML': '154.118.128.0/18',
4333         'MM': '37.111.0.0/17',
4334         'MN': '49.0.128.0/17',
4335         'MO': '60.246.0.0/16',
4336         'MP': '202.88.64.0/20',
4337         'MQ': '109.203.224.0/19',
4338         'MR': '41.188.64.0/18',
4339         'MS': '208.90.112.0/22',
4340         'MT': '46.11.0.0/16',
4341         'MU': '105.16.0.0/12',
4342         'MV': '27.114.128.0/18',
4343         'MW': '102.70.0.0/15',
4344         'MX': '187.192.0.0/11',
4345         'MY': '175.136.0.0/13',
4346         'MZ': '197.218.0.0/15',
4347         'NA': '41.182.0.0/16',
4348         'NC': '101.101.0.0/18',
4349         'NE': '197.214.0.0/18',
4350         'NF': '203.17.240.0/22',
4351         'NG': '105.112.0.0/12',
4352         'NI': '186.76.0.0/15',
4353         'NL': '145.96.0.0/11',
4354         'NO': '84.208.0.0/13',
4355         'NP': '36.252.0.0/15',
4356         'NR': '203.98.224.0/19',
4357         'NU': '49.156.48.0/22',
4358         'NZ': '49.224.0.0/14',
4359         'OM': '5.36.0.0/15',
4360         'PA': '186.72.0.0/15',
4361         'PE': '186.160.0.0/14',
4362         'PF': '123.50.64.0/18',
4363         'PG': '124.240.192.0/19',
4364         'PH': '49.144.0.0/13',
4365         'PK': '39.32.0.0/11',
4366         'PL': '83.0.0.0/11',
4367         'PM': '70.36.0.0/20',
4368         'PR': '66.50.0.0/16',
4369         'PS': '188.161.0.0/16',
4370         'PT': '85.240.0.0/13',
4371         'PW': '202.124.224.0/20',
4372         'PY': '181.120.0.0/14',
4373         'QA': '37.210.0.0/15',
4374         'RE': '102.35.0.0/16',
4375         'RO': '79.112.0.0/13',
4376         'RS': '93.86.0.0/15',
4377         'RU': '5.136.0.0/13',
4378         'RW': '41.186.0.0/16',
4379         'SA': '188.48.0.0/13',
4380         'SB': '202.1.160.0/19',
4381         'SC': '154.192.0.0/11',
4382         'SD': '102.120.0.0/13',
4383         'SE': '78.64.0.0/12',
4384         'SG': '8.128.0.0/10',
4385         'SI': '188.196.0.0/14',
4386         'SK': '78.98.0.0/15',
4387         'SL': '102.143.0.0/17',
4388         'SM': '89.186.32.0/19',
4389         'SN': '41.82.0.0/15',
4390         'SO': '154.115.192.0/18',
4391         'SR': '186.179.128.0/17',
4392         'SS': '105.235.208.0/21',
4393         'ST': '197.159.160.0/19',
4394         'SV': '168.243.0.0/16',
4395         'SX': '190.102.0.0/20',
4396         'SY': '5.0.0.0/16',
4397         'SZ': '41.84.224.0/19',
4398         'TC': '65.255.48.0/20',
4399         'TD': '154.68.128.0/19',
4400         'TG': '196.168.0.0/14',
4401         'TH': '171.96.0.0/13',
4402         'TJ': '85.9.128.0/18',
4403         'TK': '27.96.24.0/21',
4404         'TL': '180.189.160.0/20',
4405         'TM': '95.85.96.0/19',
4406         'TN': '197.0.0.0/11',
4407         'TO': '175.176.144.0/21',
4408         'TR': '78.160.0.0/11',
4409         'TT': '186.44.0.0/15',
4410         'TV': '202.2.96.0/19',
4411         'TW': '120.96.0.0/11',
4412         'TZ': '156.156.0.0/14',
4413         'UA': '37.52.0.0/14',
4414         'UG': '102.80.0.0/13',
4415         'US': '6.0.0.0/8',
4416         'UY': '167.56.0.0/13',
4417         'UZ': '84.54.64.0/18',
4418         'VA': '212.77.0.0/19',
4419         'VC': '207.191.240.0/21',
4420         'VE': '186.88.0.0/13',
4421         'VG': '66.81.192.0/20',
4422         'VI': '146.226.0.0/16',
4423         'VN': '14.160.0.0/11',
4424         'VU': '202.80.32.0/20',
4425         'WF': '117.20.32.0/21',
4426         'WS': '202.4.32.0/19',
4427         'YE': '134.35.0.0/16',
4428         'YT': '41.242.116.0/22',
4429         'ZA': '41.0.0.0/11',
4430         'ZM': '102.144.0.0/13',
4431         'ZW': '102.177.192.0/18',
4432     }
4433
4434     @classmethod
4435     def random_ipv4(cls, code_or_block):
4436         if len(code_or_block) == 2:
4437             block = cls._country_ip_map.get(code_or_block.upper())
4438             if not block:
4439                 return None
4440         else:
4441             block = code_or_block
4442         addr, preflen = block.split('/')
4443         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
4444         addr_max = addr_min | (0xffffffff >> int(preflen))
4445         return compat_str(socket.inet_ntoa(
4446             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
4447
4448
4449 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
4450     def __init__(self, proxies=None):
4451         # Set default handlers
4452         for type in ('http', 'https'):
4453             setattr(self, '%s_open' % type,
4454                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4455                         meth(r, proxy, type))
4456         compat_urllib_request.ProxyHandler.__init__(self, proxies)
4457
4458     def proxy_open(self, req, proxy, type):
4459         req_proxy = req.headers.get('Ytdl-request-proxy')
4460         if req_proxy is not None:
4461             proxy = req_proxy
4462             del req.headers['Ytdl-request-proxy']
4463
4464         if proxy == '__noproxy__':
4465             return None  # No Proxy
4466         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4467             req.add_header('Ytdl-socks-proxy', proxy)
4468             # yt-dlp's http/https handlers do wrapping the socket with socks
4469             return None
4470         return compat_urllib_request.ProxyHandler.proxy_open(
4471             self, req, proxy, type)
4472
4473
4474 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4475 # released into Public Domain
4476 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4477
4478 def long_to_bytes(n, blocksize=0):
4479     """long_to_bytes(n:long, blocksize:int) : string
4480     Convert a long integer to a byte string.
4481
4482     If optional blocksize is given and greater than zero, pad the front of the
4483     byte string with binary zeros so that the length is a multiple of
4484     blocksize.
4485     """
4486     # after much testing, this algorithm was deemed to be the fastest
4487     s = b''
4488     n = int(n)
4489     while n > 0:
4490         s = compat_struct_pack('>I', n & 0xffffffff) + s
4491         n = n >> 32
4492     # strip off leading zeros
4493     for i in range(len(s)):
4494         if s[i] != b'\000'[0]:
4495             break
4496     else:
4497         # only happens when n == 0
4498         s = b'\000'
4499         i = 0
4500     s = s[i:]
4501     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4502     # de-padding being done above, but sigh...
4503     if blocksize > 0 and len(s) % blocksize:
4504         s = (blocksize - len(s) % blocksize) * b'\000' + s
4505     return s
4506
4507
4508 def bytes_to_long(s):
4509     """bytes_to_long(string) : long
4510     Convert a byte string to a long integer.
4511
4512     This is (essentially) the inverse of long_to_bytes().
4513     """
4514     acc = 0
4515     length = len(s)
4516     if length % 4:
4517         extra = (4 - length % 4)
4518         s = b'\000' * extra + s
4519         length = length + extra
4520     for i in range(0, length, 4):
4521         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
4522     return acc
4523
4524
4525 def ohdave_rsa_encrypt(data, exponent, modulus):
4526     '''
4527     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4528
4529     Input:
4530         data: data to encrypt, bytes-like object
4531         exponent, modulus: parameter e and N of RSA algorithm, both integer
4532     Output: hex string of encrypted data
4533
4534     Limitation: supports one block encryption only
4535     '''
4536
4537     payload = int(binascii.hexlify(data[::-1]), 16)
4538     encrypted = pow(payload, exponent, modulus)
4539     return '%x' % encrypted
4540
4541
4542 def pkcs1pad(data, length):
4543     """
4544     Padding input data with PKCS#1 scheme
4545
4546     @param {int[]} data        input data
4547     @param {int}   length      target length
4548     @returns {int[]}           padded data
4549     """
4550     if len(data) > length - 11:
4551         raise ValueError('Input data too long for PKCS#1 padding')
4552
4553     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4554     return [0, 2] + pseudo_random + [0] + data
4555
4556
4557 def encode_base_n(num, n, table=None):
4558     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
4559     if not table:
4560         table = FULL_TABLE[:n]
4561
4562     if n > len(table):
4563         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
4564
4565     if num == 0:
4566         return table[0]
4567
4568     ret = ''
4569     while num:
4570         ret = table[num % n] + ret
4571         num = num // n
4572     return ret
4573
4574
4575 def decode_packed_codes(code):
4576     mobj = re.search(PACKED_CODES_RE, code)
4577     obfuscated_code, base, count, symbols = mobj.groups()
4578     base = int(base)
4579     count = int(count)
4580     symbols = symbols.split('|')
4581     symbol_table = {}
4582
4583     while count:
4584         count -= 1
4585         base_n_count = encode_base_n(count, base)
4586         symbol_table[base_n_count] = symbols[count] or base_n_count
4587
4588     return re.sub(
4589         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4590         obfuscated_code)
4591
4592
4593 def caesar(s, alphabet, shift):
4594     if shift == 0:
4595         return s
4596     l = len(alphabet)
4597     return ''.join(
4598         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4599         for c in s)
4600
4601
4602 def rot47(s):
4603     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4604
4605
4606 def parse_m3u8_attributes(attrib):
4607     info = {}
4608     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4609         if val.startswith('"'):
4610             val = val[1:-1]
4611         info[key] = val
4612     return info
4613
4614
4615 def urshift(val, n):
4616     return val >> n if val >= 0 else (val + 0x100000000) >> n
4617
4618
4619 # Based on png2str() written by @gdkchan and improved by @yokrysty
4620 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4621 def decode_png(png_data):
4622     # Reference: https://www.w3.org/TR/PNG/
4623     header = png_data[8:]
4624
4625     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4626         raise OSError('Not a valid PNG file.')
4627
4628     int_map = {1: '>B', 2: '>H', 4: '>I'}
4629     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
4630
4631     chunks = []
4632
4633     while header:
4634         length = unpack_integer(header[:4])
4635         header = header[4:]
4636
4637         chunk_type = header[:4]
4638         header = header[4:]
4639
4640         chunk_data = header[:length]
4641         header = header[length:]
4642
4643         header = header[4:]  # Skip CRC
4644
4645         chunks.append({
4646             'type': chunk_type,
4647             'length': length,
4648             'data': chunk_data
4649         })
4650
4651     ihdr = chunks[0]['data']
4652
4653     width = unpack_integer(ihdr[:4])
4654     height = unpack_integer(ihdr[4:8])
4655
4656     idat = b''
4657
4658     for chunk in chunks:
4659         if chunk['type'] == b'IDAT':
4660             idat += chunk['data']
4661
4662     if not idat:
4663         raise OSError('Unable to read PNG data.')
4664
4665     decompressed_data = bytearray(zlib.decompress(idat))
4666
4667     stride = width * 3
4668     pixels = []
4669
4670     def _get_pixel(idx):
4671         x = idx % stride
4672         y = idx // stride
4673         return pixels[y][x]
4674
4675     for y in range(height):
4676         basePos = y * (1 + stride)
4677         filter_type = decompressed_data[basePos]
4678
4679         current_row = []
4680
4681         pixels.append(current_row)
4682
4683         for x in range(stride):
4684             color = decompressed_data[1 + basePos + x]
4685             basex = y * stride + x
4686             left = 0
4687             up = 0
4688
4689             if x > 2:
4690                 left = _get_pixel(basex - 3)
4691             if y > 0:
4692                 up = _get_pixel(basex - stride)
4693
4694             if filter_type == 1:  # Sub
4695                 color = (color + left) & 0xff
4696             elif filter_type == 2:  # Up
4697                 color = (color + up) & 0xff
4698             elif filter_type == 3:  # Average
4699                 color = (color + ((left + up) >> 1)) & 0xff
4700             elif filter_type == 4:  # Paeth
4701                 a = left
4702                 b = up
4703                 c = 0
4704
4705                 if x > 2 and y > 0:
4706                     c = _get_pixel(basex - stride - 3)
4707
4708                 p = a + b - c
4709
4710                 pa = abs(p - a)
4711                 pb = abs(p - b)
4712                 pc = abs(p - c)
4713
4714                 if pa <= pb and pa <= pc:
4715                     color = (color + a) & 0xff
4716                 elif pb <= pc:
4717                     color = (color + b) & 0xff
4718                 else:
4719                     color = (color + c) & 0xff
4720
4721             current_row.append(color)
4722
4723     return width, height, pixels
4724
4725
4726 def write_xattr(path, key, value):
4727     # Windows: Write xattrs to NTFS Alternate Data Streams:
4728     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4729     if compat_os_name == 'nt':
4730         assert ':' not in key
4731         assert os.path.exists(path)
4732
4733         try:
4734             with open(f'{path}:{key}', 'wb') as f:
4735                 f.write(value)
4736         except OSError as e:
4737             raise XAttrMetadataError(e.errno, e.strerror)
4738         return
4739
4740     # UNIX Method 1. Use xattrs/pyxattrs modules
4741     from .dependencies import xattr
4742
4743     setxattr = None
4744     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
4745         # Unicode arguments are not supported in pyxattr until version 0.5.0
4746         # See https://github.com/ytdl-org/youtube-dl/issues/5498
4747         if version_tuple(xattr.__version__) >= (0, 5, 0):
4748             setxattr = xattr.set
4749     elif xattr:
4750         setxattr = xattr.setxattr
4751
4752     if setxattr:
4753         try:
4754             setxattr(path, key, value)
4755         except OSError as e:
4756             raise XAttrMetadataError(e.errno, e.strerror)
4757         return
4758
4759     # UNIX Method 2. Use setfattr/xattr executables
4760     exe = ('setfattr' if check_executable('setfattr', ['--version'])
4761            else 'xattr' if check_executable('xattr', ['-h']) else None)
4762     if not exe:
4763         raise XAttrUnavailableError(
4764             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
4765             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
4766
4767     value = value.decode()
4768     try:
4769         p = Popen(
4770             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
4771             stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
4772     except OSError as e:
4773         raise XAttrMetadataError(e.errno, e.strerror)
4774     stderr = p.communicate_or_kill()[1].decode('utf-8', 'replace')
4775     if p.returncode:
4776         raise XAttrMetadataError(p.returncode, stderr)
4777
4778
4779 def random_birthday(year_field, month_field, day_field):
4780     start_date = datetime.date(1950, 1, 1)
4781     end_date = datetime.date(1995, 12, 31)
4782     offset = random.randint(0, (end_date - start_date).days)
4783     random_date = start_date + datetime.timedelta(offset)
4784     return {
4785         year_field: str(random_date.year),
4786         month_field: str(random_date.month),
4787         day_field: str(random_date.day),
4788     }
4789
4790
4791 # Templates for internet shortcut files, which are plain text files.
4792 DOT_URL_LINK_TEMPLATE = '''\
4793 [InternetShortcut]
4794 URL=%(url)s
4795 '''
4796
4797 DOT_WEBLOC_LINK_TEMPLATE = '''\
4798 <?xml version="1.0" encoding="UTF-8"?>
4799 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4800 <plist version="1.0">
4801 <dict>
4802 \t<key>URL</key>
4803 \t<string>%(url)s</string>
4804 </dict>
4805 </plist>
4806 '''
4807
4808 DOT_DESKTOP_LINK_TEMPLATE = '''\
4809 [Desktop Entry]
4810 Encoding=UTF-8
4811 Name=%(filename)s
4812 Type=Link
4813 URL=%(url)s
4814 Icon=text-html
4815 '''
4816
4817 LINK_TEMPLATES = {
4818     'url': DOT_URL_LINK_TEMPLATE,
4819     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
4820     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
4821 }
4822
4823
4824 def iri_to_uri(iri):
4825     """
4826     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
4827
4828     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
4829     """
4830
4831     iri_parts = compat_urllib_parse_urlparse(iri)
4832
4833     if '[' in iri_parts.netloc:
4834         raise ValueError('IPv6 URIs are not, yet, supported.')
4835         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
4836
4837     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
4838
4839     net_location = ''
4840     if iri_parts.username:
4841         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
4842         if iri_parts.password is not None:
4843             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
4844         net_location += '@'
4845
4846     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
4847     # The 'idna' encoding produces ASCII text.
4848     if iri_parts.port is not None and iri_parts.port != 80:
4849         net_location += ':' + str(iri_parts.port)
4850
4851     return urllib.parse.urlunparse(
4852         (iri_parts.scheme,
4853             net_location,
4854
4855             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
4856
4857             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
4858             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
4859
4860             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
4861             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
4862
4863             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
4864
4865     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
4866
4867
4868 def to_high_limit_path(path):
4869     if sys.platform in ['win32', 'cygwin']:
4870         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
4871         return '\\\\?\\' + os.path.abspath(path)
4872
4873     return path
4874
4875
4876 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
4877     val = traverse_obj(obj, *variadic(field))
4878     if val in ignore:
4879         return default
4880     return template % (func(val) if func else val)
4881
4882
4883 def clean_podcast_url(url):
4884     return re.sub(r'''(?x)
4885         (?:
4886             (?:
4887                 chtbl\.com/track|
4888                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
4889                 play\.podtrac\.com
4890             )/[^/]+|
4891             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
4892             flex\.acast\.com|
4893             pd(?:
4894                 cn\.co| # https://podcorn.com/analytics-prefix/
4895                 st\.fm # https://podsights.com/docs/
4896             )/e
4897         )/''', '', url)
4898
4899
4900 _HEX_TABLE = '0123456789abcdef'
4901
4902
4903 def random_uuidv4():
4904     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
4905
4906
4907 def make_dir(path, to_screen=None):
4908     try:
4909         dn = os.path.dirname(path)
4910         if dn and not os.path.exists(dn):
4911             os.makedirs(dn)
4912         return True
4913     except OSError as err:
4914         if callable(to_screen) is not None:
4915             to_screen('unable to create directory ' + error_to_compat_str(err))
4916         return False
4917
4918
4919 def get_executable_path():
4920     from .update import _get_variant_and_executable_path
4921
4922     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
4923
4924
4925 def load_plugins(name, suffix, namespace):
4926     classes = {}
4927     with contextlib.suppress(FileNotFoundError):
4928         plugins_spec = importlib.util.spec_from_file_location(
4929             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
4930         plugins = importlib.util.module_from_spec(plugins_spec)
4931         sys.modules[plugins_spec.name] = plugins
4932         plugins_spec.loader.exec_module(plugins)
4933         for name in dir(plugins):
4934             if name in namespace:
4935                 continue
4936             if not name.endswith(suffix):
4937                 continue
4938             klass = getattr(plugins, name)
4939             classes[name] = namespace[name] = klass
4940     return classes
4941
4942
4943 def traverse_obj(
4944         obj, *path_list, default=None, expected_type=None, get_all=True,
4945         casesense=True, is_user_input=False, traverse_string=False):
4946     ''' Traverse nested list/dict/tuple
4947     @param path_list        A list of paths which are checked one by one.
4948                             Each path is a list of keys where each key is a:
4949                               - None:     Do nothing
4950                               - string:   A dictionary key
4951                               - int:      An index into a list
4952                               - tuple:    A list of keys all of which will be traversed
4953                               - Ellipsis: Fetch all values in the object
4954                               - Function: Takes the key and value as arguments
4955                                           and returns whether the key matches or not
4956     @param default          Default value to return
4957     @param expected_type    Only accept final value of this type (Can also be any callable)
4958     @param get_all          Return all the values obtained from a path or only the first one
4959     @param casesense        Whether to consider dictionary keys as case sensitive
4960     @param is_user_input    Whether the keys are generated from user input. If True,
4961                             strings are converted to int/slice if necessary
4962     @param traverse_string  Whether to traverse inside strings. If True, any
4963                             non-compatible object will also be converted into a string
4964     # TODO: Write tests
4965     '''
4966     if not casesense:
4967         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
4968         path_list = (map(_lower, variadic(path)) for path in path_list)
4969
4970     def _traverse_obj(obj, path, _current_depth=0):
4971         nonlocal depth
4972         path = tuple(variadic(path))
4973         for i, key in enumerate(path):
4974             if None in (key, obj):
4975                 return obj
4976             if isinstance(key, (list, tuple)):
4977                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
4978                 key = ...
4979             if key is ...:
4980                 obj = (obj.values() if isinstance(obj, dict)
4981                        else obj if isinstance(obj, (list, tuple, LazyList))
4982                        else str(obj) if traverse_string else [])
4983                 _current_depth += 1
4984                 depth = max(depth, _current_depth)
4985                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
4986             elif callable(key):
4987                 if isinstance(obj, (list, tuple, LazyList)):
4988                     obj = enumerate(obj)
4989                 elif isinstance(obj, dict):
4990                     obj = obj.items()
4991                 else:
4992                     if not traverse_string:
4993                         return None
4994                     obj = str(obj)
4995                 _current_depth += 1
4996                 depth = max(depth, _current_depth)
4997                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
4998             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
4999                 obj = (obj.get(key) if casesense or (key in obj)
5000                        else next((v for k, v in obj.items() if _lower(k) == key), None))
5001             else:
5002                 if is_user_input:
5003                     key = (int_or_none(key) if ':' not in key
5004                            else slice(*map(int_or_none, key.split(':'))))
5005                     if key == slice(None):
5006                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
5007                 if not isinstance(key, (int, slice)):
5008                     return None
5009                 if not isinstance(obj, (list, tuple, LazyList)):
5010                     if not traverse_string:
5011                         return None
5012                     obj = str(obj)
5013                 try:
5014                     obj = obj[key]
5015                 except IndexError:
5016                     return None
5017         return obj
5018
5019     if isinstance(expected_type, type):
5020         type_test = lambda val: val if isinstance(val, expected_type) else None
5021     elif expected_type is not None:
5022         type_test = expected_type
5023     else:
5024         type_test = lambda val: val
5025
5026     for path in path_list:
5027         depth = 0
5028         val = _traverse_obj(obj, path)
5029         if val is not None:
5030             if depth:
5031                 for _ in range(depth - 1):
5032                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5033                 val = [v for v in map(type_test, val) if v is not None]
5034                 if val:
5035                     return val if get_all else val[0]
5036             else:
5037                 val = type_test(val)
5038                 if val is not None:
5039                     return val
5040     return default
5041
5042
5043 def traverse_dict(dictn, keys, casesense=True):
5044     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5045                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5046     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5047
5048
5049 def get_first(obj, keys, **kwargs):
5050     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5051
5052
5053 def variadic(x, allowed_types=(str, bytes, dict)):
5054     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5055
5056
5057 def decode_base(value, digits):
5058     # This will convert given base-x string to scalar (long or int)
5059     table = {char: index for index, char in enumerate(digits)}
5060     result = 0
5061     base = len(digits)
5062     for chr in value:
5063         result *= base
5064         result += table[chr]
5065     return result
5066
5067
5068 def time_seconds(**kwargs):
5069     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5070     return t.timestamp()
5071
5072
5073 # create a JSON Web Signature (jws) with HS256 algorithm
5074 # the resulting format is in JWS Compact Serialization
5075 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5076 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5077 def jwt_encode_hs256(payload_data, key, headers={}):
5078     header_data = {
5079         'alg': 'HS256',
5080         'typ': 'JWT',
5081     }
5082     if headers:
5083         header_data.update(headers)
5084     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5085     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5086     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5087     signature_b64 = base64.b64encode(h.digest())
5088     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5089     return token
5090
5091
5092 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5093 def jwt_decode_hs256(jwt):
5094     header_b64, payload_b64, signature_b64 = jwt.split('.')
5095     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5096     return payload_data
5097
5098
5099 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5100
5101
5102 @functools.cache
5103 def supports_terminal_sequences(stream):
5104     if compat_os_name == 'nt':
5105         if not WINDOWS_VT_MODE:
5106             return False
5107     elif not os.getenv('TERM'):
5108         return False
5109     try:
5110         return stream.isatty()
5111     except BaseException:
5112         return False
5113
5114
5115 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5116     if get_windows_version() < (10, 0, 10586):
5117         return
5118     global WINDOWS_VT_MODE
5119     startupinfo = subprocess.STARTUPINFO()
5120     startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
5121     try:
5122         subprocess.Popen('', shell=True, startupinfo=startupinfo).wait()
5123     except Exception:
5124         return
5125
5126     WINDOWS_VT_MODE = True
5127     supports_terminal_sequences.cache_clear()
5128
5129
5130 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5131
5132
5133 def remove_terminal_sequences(string):
5134     return _terminal_sequences_re.sub('', string)
5135
5136
5137 def number_of_digits(number):
5138     return len('%d' % number)
5139
5140
5141 def join_nonempty(*values, delim='-', from_dict=None):
5142     if from_dict is not None:
5143         values = map(from_dict.get, values)
5144     return delim.join(map(str, filter(None, values)))
5145
5146
5147 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5148     """
5149     Find the largest format dimensions in terms of video width and, for each thumbnail:
5150     * Modify the URL: Match the width with the provided regex and replace with the former width
5151     * Update dimensions
5152
5153     This function is useful with video services that scale the provided thumbnails on demand
5154     """
5155     _keys = ('width', 'height')
5156     max_dimensions = max(
5157         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5158         default=(0, 0))
5159     if not max_dimensions[0]:
5160         return thumbnails
5161     return [
5162         merge_dicts(
5163             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5164             dict(zip(_keys, max_dimensions)), thumbnail)
5165         for thumbnail in thumbnails
5166     ]
5167
5168
5169 def parse_http_range(range):
5170     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5171     if not range:
5172         return None, None, None
5173     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5174     if not crg:
5175         return None, None, None
5176     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5177
5178
5179 def read_stdin(what):
5180     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5181     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5182     return sys.stdin
5183
5184
5185 class Config:
5186     own_args = None
5187     parsed_args = None
5188     filename = None
5189     __initialized = False
5190
5191     def __init__(self, parser, label=None):
5192         self.parser, self.label = parser, label
5193         self._loaded_paths, self.configs = set(), []
5194
5195     def init(self, args=None, filename=None):
5196         assert not self.__initialized
5197         directory = ''
5198         if filename:
5199             location = os.path.realpath(filename)
5200             directory = os.path.dirname(location)
5201             if location in self._loaded_paths:
5202                 return False
5203             self._loaded_paths.add(location)
5204
5205         self.own_args, self.__initialized = args, True
5206         opts, _ = self.parser.parse_known_args(args)
5207         self.parsed_args, self.filename = args, filename
5208
5209         for location in opts.config_locations or []:
5210             if location == '-':
5211                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5212                 continue
5213             location = os.path.join(directory, expand_path(location))
5214             if os.path.isdir(location):
5215                 location = os.path.join(location, 'yt-dlp.conf')
5216             if not os.path.exists(location):
5217                 self.parser.error(f'config location {location} does not exist')
5218             self.append_config(self.read_file(location), location)
5219         return True
5220
5221     def __str__(self):
5222         label = join_nonempty(
5223             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5224             delim=' ')
5225         return join_nonempty(
5226             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5227             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5228             delim='\n')
5229
5230     @staticmethod
5231     def read_file(filename, default=[]):
5232         try:
5233             optionf = open(filename)
5234         except OSError:
5235             return default  # silently skip if file is not present
5236         try:
5237             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5238             contents = optionf.read()
5239             res = shlex.split(contents, comments=True)
5240         finally:
5241             optionf.close()
5242         return res
5243
5244     @staticmethod
5245     def hide_login_info(opts):
5246         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5247         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5248
5249         def _scrub_eq(o):
5250             m = eqre.match(o)
5251             if m:
5252                 return m.group('key') + '=PRIVATE'
5253             else:
5254                 return o
5255
5256         opts = list(map(_scrub_eq, opts))
5257         for idx, opt in enumerate(opts):
5258             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5259                 opts[idx + 1] = 'PRIVATE'
5260         return opts
5261
5262     def append_config(self, *args, label=None):
5263         config = type(self)(self.parser, label)
5264         config._loaded_paths = self._loaded_paths
5265         if config.init(*args):
5266             self.configs.append(config)
5267
5268     @property
5269     def all_args(self):
5270         for config in reversed(self.configs):
5271             yield from config.all_args
5272         yield from self.parsed_args or []
5273
5274     def parse_known_args(self, **kwargs):
5275         return self.parser.parse_known_args(self.all_args, **kwargs)
5276
5277     def parse_args(self):
5278         return self.parser.parse_args(self.all_args)
5279
5280
5281 class WebSocketsWrapper():
5282     """Wraps websockets module to use in non-async scopes"""
5283     pool = None
5284
5285     def __init__(self, url, headers=None, connect=True):
5286         self.loop = asyncio.new_event_loop()
5287         # XXX: "loop" is deprecated
5288         self.conn = websockets.connect(
5289             url, extra_headers=headers, ping_interval=None,
5290             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5291         if connect:
5292             self.__enter__()
5293         atexit.register(self.__exit__, None, None, None)
5294
5295     def __enter__(self):
5296         if not self.pool:
5297             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5298         return self
5299
5300     def send(self, *args):
5301         self.run_with_loop(self.pool.send(*args), self.loop)
5302
5303     def recv(self, *args):
5304         return self.run_with_loop(self.pool.recv(*args), self.loop)
5305
5306     def __exit__(self, type, value, traceback):
5307         try:
5308             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5309         finally:
5310             self.loop.close()
5311             self._cancel_all_tasks(self.loop)
5312
5313     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5314     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5315     @staticmethod
5316     def run_with_loop(main, loop):
5317         if not asyncio.iscoroutine(main):
5318             raise ValueError(f'a coroutine was expected, got {main!r}')
5319
5320         try:
5321             return loop.run_until_complete(main)
5322         finally:
5323             loop.run_until_complete(loop.shutdown_asyncgens())
5324             if hasattr(loop, 'shutdown_default_executor'):
5325                 loop.run_until_complete(loop.shutdown_default_executor())
5326
5327     @staticmethod
5328     def _cancel_all_tasks(loop):
5329         to_cancel = asyncio.all_tasks(loop)
5330
5331         if not to_cancel:
5332             return
5333
5334         for task in to_cancel:
5335             task.cancel()
5336
5337         # XXX: "loop" is removed in python 3.10+
5338         loop.run_until_complete(
5339             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5340
5341         for task in to_cancel:
5342             if task.cancelled():
5343                 continue
5344             if task.exception() is not None:
5345                 loop.call_exception_handler({
5346                     'message': 'unhandled exception during asyncio.run() shutdown',
5347                     'exception': task.exception(),
5348                     'task': task,
5349                 })
5350
5351
5352 def merge_headers(*dicts):
5353     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5354     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5355
5356
5357 class classproperty:
5358     """classmethod(property(func)) that works in py < 3.9"""
5359
5360     def __init__(self, func):
5361         functools.update_wrapper(self, func)
5362         self.func = func
5363
5364     def __get__(self, _, cls):
5365         return self.func(cls)
5366
5367
5368 class Namespace:
5369     """Immutable namespace"""
5370
5371     def __init__(self, **kwargs):
5372         self._dict = kwargs
5373
5374     def __getattr__(self, attr):
5375         return self._dict[attr]
5376
5377     def __contains__(self, item):
5378         return item in self._dict.values()
5379
5380     def __iter__(self):
5381         return iter(self._dict.items())
5382
5383     def __repr__(self):
5384         return f'{type(self).__name__}({", ".join(f"{k}={v}" for k, v in self)})'
5385
5386
5387 # Deprecated
5388 has_certifi = bool(certifi)
5389 has_websockets = bool(websockets)