yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import asyncio
   7 import atexit
   8 import base64
   9 import binascii
  10 import calendar
  11 import codecs
  12 import collections
  13 import contextlib
  14 import ctypes
  15 import datetime
  16 import email.utils
  17 import email.header
  18 import errno
  19 import functools
  20 import gzip
  21 import hashlib
  22 import hmac
  23 import importlib.util
  24 import io
  25 import itertools
  26 import json
  27 import locale
  28 import math
  29 import operator
  30 import os
  31 import platform
  32 import random
  33 import re
  34 import socket
  35 import ssl
  36 import subprocess
  37 import sys
  38 import tempfile
  39 import time
  40 import traceback
  41 import xml.etree.ElementTree
  42 import zlib
  43 import mimetypes
  44
  45 from .compat import (
  46     compat_HTMLParseError,
  47     compat_HTMLParser,
  48     compat_HTTPError,
  49     compat_basestring,
  50     compat_brotli,
  51     compat_chr,
  52     compat_cookiejar,
  53     compat_etree_fromstring,
  54     compat_expanduser,
  55     compat_html_entities,
  56     compat_html_entities_html5,
  57     compat_http_client,
  58     compat_integer_types,
  59     compat_numeric_types,
  60     compat_kwargs,
  61     compat_os_name,
  62     compat_parse_qs,
  63     compat_shlex_split,
  64     compat_shlex_quote,
  65     compat_str,
  66     compat_struct_pack,
  67     compat_struct_unpack,
  68     compat_urllib_error,
  69     compat_urllib_parse,
  70     compat_urllib_parse_urlencode,
  71     compat_urllib_parse_urlparse,
  72     compat_urllib_parse_urlunparse,
  73     compat_urllib_parse_quote,
  74     compat_urllib_parse_quote_plus,
  75     compat_urllib_parse_unquote_plus,
  76     compat_urllib_request,
  77     compat_urlparse,
  78     compat_websockets,
  79     compat_xpath,
  80 )
  81
  82 from .socks import (
  83     ProxyType,
  84     sockssocket,
  85 )
  86
  87 try:
  88     import certifi
  89     has_certifi = True
  90 except ImportError:
  91     has_certifi = False
  92
  93
  94 def register_socks_protocols():
  95     # "Register" SOCKS protocols
  96     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  97     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  98     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  99         if scheme not in compat_urlparse.uses_netloc:
 100             compat_urlparse.uses_netloc.append(scheme)
 101
 102
 103 # This is not clearly defined otherwise
 104 compiled_regex_type = type(re.compile(''))
 105
 106
 107 def random_user_agent():
 108     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
 109     _CHROME_VERSIONS = (
 110         '90.0.4430.212',
 111         '90.0.4430.24',
 112         '90.0.4430.70',
 113         '90.0.4430.72',
 114         '90.0.4430.85',
 115         '90.0.4430.93',
 116         '91.0.4472.101',
 117         '91.0.4472.106',
 118         '91.0.4472.114',
 119         '91.0.4472.124',
 120         '91.0.4472.164',
 121         '91.0.4472.19',
 122         '91.0.4472.77',
 123         '92.0.4515.107',
 124         '92.0.4515.115',
 125         '92.0.4515.131',
 126         '92.0.4515.159',
 127         '92.0.4515.43',
 128         '93.0.4556.0',
 129         '93.0.4577.15',
 130         '93.0.4577.63',
 131         '93.0.4577.82',
 132         '94.0.4606.41',
 133         '94.0.4606.54',
 134         '94.0.4606.61',
 135         '94.0.4606.71',
 136         '94.0.4606.81',
 137         '94.0.4606.85',
 138         '95.0.4638.17',
 139         '95.0.4638.50',
 140         '95.0.4638.54',
 141         '95.0.4638.69',
 142         '95.0.4638.74',
 143         '96.0.4664.18',
 144         '96.0.4664.45',
 145         '96.0.4664.55',
 146         '96.0.4664.93',
 147         '97.0.4692.20',
 148     )
 149     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 150
 151
 152 SUPPORTED_ENCODINGS = [
 153     'gzip', 'deflate'
 154 ]
 155 if compat_brotli:
 156     SUPPORTED_ENCODINGS.append('br')
 157
 158 std_headers = {
 159     'User-Agent': random_user_agent(),
 160     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 161     'Accept-Language': 'en-us,en;q=0.5',
 162     'Sec-Fetch-Mode': 'navigate',
 163 }
 164
 165
 166 USER_AGENTS = {
 167     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 168 }
 169
 170
 171 NO_DEFAULT = object()
 172
 173 ENGLISH_MONTH_NAMES = [
 174     'January', 'February', 'March', 'April', 'May', 'June',
 175     'July', 'August', 'September', 'October', 'November', 'December']
 176
 177 MONTH_NAMES = {
 178     'en': ENGLISH_MONTH_NAMES,
 179     'fr': [
 180         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 181         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 182 }
 183
 184 KNOWN_EXTENSIONS = (
 185     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 186     'flv', 'f4v', 'f4a', 'f4b',
 187     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 188     'mkv', 'mka', 'mk3d',
 189     'avi', 'divx',
 190     'mov',
 191     'asf', 'wmv', 'wma',
 192     '3gp', '3g2',
 193     'mp3',
 194     'flac',
 195     'ape',
 196     'wav',
 197     'f4f', 'f4m', 'm3u8', 'smil')
 198
 199 # needed for sanitizing filenames in restricted mode
 200 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 201                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 202                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 203
 204 DATE_FORMATS = (
 205     '%d %B %Y',
 206     '%d %b %Y',
 207     '%B %d %Y',
 208     '%B %dst %Y',
 209     '%B %dnd %Y',
 210     '%B %drd %Y',
 211     '%B %dth %Y',
 212     '%b %d %Y',
 213     '%b %dst %Y',
 214     '%b %dnd %Y',
 215     '%b %drd %Y',
 216     '%b %dth %Y',
 217     '%b %dst %Y %I:%M',
 218     '%b %dnd %Y %I:%M',
 219     '%b %drd %Y %I:%M',
 220     '%b %dth %Y %I:%M',
 221     '%Y %m %d',
 222     '%Y-%m-%d',
 223     '%Y.%m.%d.',
 224     '%Y/%m/%d',
 225     '%Y/%m/%d %H:%M',
 226     '%Y/%m/%d %H:%M:%S',
 227     '%Y%m%d%H%M',
 228     '%Y%m%d%H%M%S',
 229     '%Y%m%d',
 230     '%Y-%m-%d %H:%M',
 231     '%Y-%m-%d %H:%M:%S',
 232     '%Y-%m-%d %H:%M:%S.%f',
 233     '%Y-%m-%d %H:%M:%S:%f',
 234     '%d.%m.%Y %H:%M',
 235     '%d.%m.%Y %H.%M',
 236     '%Y-%m-%dT%H:%M:%SZ',
 237     '%Y-%m-%dT%H:%M:%S.%fZ',
 238     '%Y-%m-%dT%H:%M:%S.%f0Z',
 239     '%Y-%m-%dT%H:%M:%S',
 240     '%Y-%m-%dT%H:%M:%S.%f',
 241     '%Y-%m-%dT%H:%M',
 242     '%b %d %Y at %H:%M',
 243     '%b %d %Y at %H:%M:%S',
 244     '%B %d %Y at %H:%M',
 245     '%B %d %Y at %H:%M:%S',
 246     '%H:%M %d-%b-%Y',
 247 )
 248
 249 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 250 DATE_FORMATS_DAY_FIRST.extend([
 251     '%d-%m-%Y',
 252     '%d.%m.%Y',
 253     '%d.%m.%y',
 254     '%d/%m/%Y',
 255     '%d/%m/%y',
 256     '%d/%m/%Y %H:%M:%S',
 257 ])
 258
 259 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 260 DATE_FORMATS_MONTH_FIRST.extend([
 261     '%m-%d-%Y',
 262     '%m.%d.%Y',
 263     '%m/%d/%Y',
 264     '%m/%d/%y',
 265     '%m/%d/%Y %H:%M:%S',
 266 ])
 267
 268 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 269 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 270
 271
 272 def preferredencoding():
 273     """Get preferred encoding.
 274
 275     Returns the best encoding scheme for the system, based on
 276     locale.getpreferredencoding() and some further tweaks.
 277     """
 278     try:
 279         pref = locale.getpreferredencoding()
 280         'TEST'.encode(pref)
 281     except Exception:
 282         pref = 'UTF-8'
 283
 284     return pref
 285
 286
 287 def write_json_file(obj, fn):
 288     """ Encode obj as JSON and write it to fn, atomically if possible """
 289
 290     tf = tempfile.NamedTemporaryFile(
 291         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 292         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 293
 294     try:
 295         with tf:
 296             json.dump(obj, tf, ensure_ascii=False)
 297         if sys.platform == 'win32':
 298             # Need to remove existing file on Windows, else os.rename raises
 299             # WindowsError or FileExistsError.
 300             try:
 301                 os.unlink(fn)
 302             except OSError:
 303                 pass
 304         try:
 305             mask = os.umask(0)
 306             os.umask(mask)
 307             os.chmod(tf.name, 0o666 & ~mask)
 308         except OSError:
 309             pass
 310         os.rename(tf.name, fn)
 311     except Exception:
 312         try:
 313             os.remove(tf.name)
 314         except OSError:
 315             pass
 316         raise
 317
 318
 319 def find_xpath_attr(node, xpath, key, val=None):
 320     """ Find the xpath xpath[@key=val] """
 321     assert re.match(r'^[a-zA-Z_-]+$', key)
 322     expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
 323     return node.find(expr)
 324
 325 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 326 # the namespace parameter
 327
 328
 329 def xpath_with_ns(path, ns_map):
 330     components = [c.split(':') for c in path.split('/')]
 331     replaced = []
 332     for c in components:
 333         if len(c) == 1:
 334             replaced.append(c[0])
 335         else:
 336             ns, tag = c
 337             replaced.append('{%s}%s' % (ns_map[ns], tag))
 338     return '/'.join(replaced)
 339
 340
 341 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 342     def _find_xpath(xpath):
 343         return node.find(compat_xpath(xpath))
 344
 345     if isinstance(xpath, (str, compat_str)):
 346         n = _find_xpath(xpath)
 347     else:
 348         for xp in xpath:
 349             n = _find_xpath(xp)
 350             if n is not None:
 351                 break
 352
 353     if n is None:
 354         if default is not NO_DEFAULT:
 355             return default
 356         elif fatal:
 357             name = xpath if name is None else name
 358             raise ExtractorError('Could not find XML element %s' % name)
 359         else:
 360             return None
 361     return n
 362
 363
 364 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 365     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 366     if n is None or n == default:
 367         return n
 368     if n.text is None:
 369         if default is not NO_DEFAULT:
 370             return default
 371         elif fatal:
 372             name = xpath if name is None else name
 373             raise ExtractorError('Could not find XML element\'s text %s' % name)
 374         else:
 375             return None
 376     return n.text
 377
 378
 379 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 380     n = find_xpath_attr(node, xpath, key)
 381     if n is None:
 382         if default is not NO_DEFAULT:
 383             return default
 384         elif fatal:
 385             name = '%s[@%s]' % (xpath, key) if name is None else name
 386             raise ExtractorError('Could not find XML attribute %s' % name)
 387         else:
 388             return None
 389     return n.attrib[key]
 390
 391
 392 def get_element_by_id(id, html):
 393     """Return the content of the tag with the specified ID in the passed HTML document"""
 394     return get_element_by_attribute('id', id, html)
 395
 396
 397 def get_element_html_by_id(id, html):
 398     """Return the html of the tag with the specified ID in the passed HTML document"""
 399     return get_element_html_by_attribute('id', id, html)
 400
 401
 402 def get_element_by_class(class_name, html):
 403     """Return the content of the first tag with the specified class in the passed HTML document"""
 404     retval = get_elements_by_class(class_name, html)
 405     return retval[0] if retval else None
 406
 407
 408 def get_element_html_by_class(class_name, html):
 409     """Return the html of the first tag with the specified class in the passed HTML document"""
 410     retval = get_elements_html_by_class(class_name, html)
 411     return retval[0] if retval else None
 412
 413
 414 def get_element_by_attribute(attribute, value, html, escape_value=True):
 415     retval = get_elements_by_attribute(attribute, value, html, escape_value)
 416     return retval[0] if retval else None
 417
 418
 419 def get_element_html_by_attribute(attribute, value, html, escape_value=True):
 420     retval = get_elements_html_by_attribute(attribute, value, html, escape_value)
 421     return retval[0] if retval else None
 422
 423
 424 def get_elements_by_class(class_name, html):
 425     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 426     return get_elements_by_attribute(
 427         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 428         html, escape_value=False)
 429
 430
 431 def get_elements_html_by_class(class_name, html):
 432     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 433     return get_elements_html_by_attribute(
 434         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 435         html, escape_value=False)
 436
 437
 438 def get_elements_by_attribute(*args, **kwargs):
 439     """Return the content of the tag with the specified attribute in the passed HTML document"""
 440     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 441
 442
 443 def get_elements_html_by_attribute(*args, **kwargs):
 444     """Return the html of the tag with the specified attribute in the passed HTML document"""
 445     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 446
 447
 448 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 449     """
 450     Return the text (content) and the html (whole) of the tag with the specified
 451     attribute in the passed HTML document
 452     """
 453
 454     value_quote_optional = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 455
 456     value = re.escape(value) if escape_value else value
 457
 458     partial_element_re = r'''(?x)
 459         <(?P<tag>[a-zA-Z0-9:._-]+)
 460          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 461          \s%(attribute)s\s*=\s*(?P<_q>['"]%(vqo)s)(?-x:%(value)s)(?P=_q)
 462         ''' % {'attribute': re.escape(attribute), 'value': value, 'vqo': value_quote_optional}
 463
 464     for m in re.finditer(partial_element_re, html):
 465         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 466
 467         yield (
 468             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 469             whole
 470         )
 471
 472
 473 class HTMLBreakOnClosingTagParser(compat_HTMLParser):
 474     """
 475     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 476     closing tag for the first opening tag it has encountered, and can be used
 477     as a context manager
 478     """
 479
 480     class HTMLBreakOnClosingTagException(Exception):
 481         pass
 482
 483     def __init__(self):
 484         self.tagstack = collections.deque()
 485         compat_HTMLParser.__init__(self)
 486
 487     def __enter__(self):
 488         return self
 489
 490     def __exit__(self, *_):
 491         self.close()
 492
 493     def close(self):
 494         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 495         # so data remains buffered; we no longer have any interest in it, thus
 496         # override this method to discard it
 497         pass
 498
 499     def handle_starttag(self, tag, _):
 500         self.tagstack.append(tag)
 501
 502     def handle_endtag(self, tag):
 503         if not self.tagstack:
 504             raise compat_HTMLParseError('no tags in the stack')
 505         while self.tagstack:
 506             inner_tag = self.tagstack.pop()
 507             if inner_tag == tag:
 508                 break
 509         else:
 510             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 511         if not self.tagstack:
 512             raise self.HTMLBreakOnClosingTagException()
 513
 514
 515 def get_element_text_and_html_by_tag(tag, html):
 516     """
 517     For the first element with the specified tag in the passed HTML document
 518     return its' content (text) and the whole element (html)
 519     """
 520     def find_or_raise(haystack, needle, exc):
 521         try:
 522             return haystack.index(needle)
 523         except ValueError:
 524             raise exc
 525     closing_tag = f'</{tag}>'
 526     whole_start = find_or_raise(
 527         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 528     content_start = find_or_raise(
 529         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 530     content_start += whole_start + 1
 531     with HTMLBreakOnClosingTagParser() as parser:
 532         parser.feed(html[whole_start:content_start])
 533         if not parser.tagstack or parser.tagstack[0] != tag:
 534             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 535         offset = content_start
 536         while offset < len(html):
 537             next_closing_tag_start = find_or_raise(
 538                 html[offset:], closing_tag,
 539                 compat_HTMLParseError(f'closing {tag} tag not found'))
 540             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 541             try:
 542                 parser.feed(html[offset:offset + next_closing_tag_end])
 543                 offset += next_closing_tag_end
 544             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 545                 return html[content_start:offset + next_closing_tag_start], \
 546                     html[whole_start:offset + next_closing_tag_end]
 547         raise compat_HTMLParseError('unexpected end of html')
 548
 549
 550 class HTMLAttributeParser(compat_HTMLParser):
 551     """Trivial HTML parser to gather the attributes for a single element"""
 552
 553     def __init__(self):
 554         self.attrs = {}
 555         compat_HTMLParser.__init__(self)
 556
 557     def handle_starttag(self, tag, attrs):
 558         self.attrs = dict(attrs)
 559
 560
 561 class HTMLListAttrsParser(compat_HTMLParser):
 562     """HTML parser to gather the attributes for the elements of a list"""
 563
 564     def __init__(self):
 565         compat_HTMLParser.__init__(self)
 566         self.items = []
 567         self._level = 0
 568
 569     def handle_starttag(self, tag, attrs):
 570         if tag == 'li' and self._level == 0:
 571             self.items.append(dict(attrs))
 572         self._level += 1
 573
 574     def handle_endtag(self, tag):
 575         self._level -= 1
 576
 577
 578 def extract_attributes(html_element):
 579     """Given a string for an HTML element such as
 580     <el
 581          a="foo" B="bar" c="&98;az" d=boz
 582          empty= noval entity="&amp;"
 583          sq='"' dq="'"
 584     >
 585     Decode and return a dictionary of attributes.
 586     {
 587         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 588         'empty': '', 'noval': None, 'entity': '&',
 589         'sq': '"', 'dq': '\''
 590     }.
 591     """
 592     parser = HTMLAttributeParser()
 593     try:
 594         parser.feed(html_element)
 595         parser.close()
 596     # Older Python may throw HTMLParseError in case of malformed HTML
 597     except compat_HTMLParseError:
 598         pass
 599     return parser.attrs
 600
 601
 602 def parse_list(webpage):
 603     """Given a string for an series of HTML <li> elements,
 604     return a dictionary of their attributes"""
 605     parser = HTMLListAttrsParser()
 606     parser.feed(webpage)
 607     parser.close()
 608     return parser.items
 609
 610
 611 def clean_html(html):
 612     """Clean an HTML snippet into a readable string"""
 613
 614     if html is None:  # Convenience for sanitizing descriptions etc.
 615         return html
 616
 617     html = re.sub(r'\s+', ' ', html)
 618     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 619     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 620     # Strip html tags
 621     html = re.sub('<.*?>', '', html)
 622     # Replace html entities
 623     html = unescapeHTML(html)
 624     return html.strip()
 625
 626
 627 def sanitize_open(filename, open_mode):
 628     """Try to open the given filename, and slightly tweak it if this fails.
 629
 630     Attempts to open the given filename. If this fails, it tries to change
 631     the filename slightly, step by step, until it's either able to open it
 632     or it fails and raises a final exception, like the standard open()
 633     function.
 634
 635     It returns the tuple (stream, definitive_file_name).
 636     """
 637     if filename == '-':
 638         if sys.platform == 'win32':
 639             import msvcrt
 640             msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 641         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 642
 643     for attempt in range(2):
 644         try:
 645             try:
 646                 if sys.platform == 'win32':
 647                     # FIXME: An exclusive lock also locks the file from being read.
 648                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 649                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 650                     raise LockingUnsupportedError()
 651                 stream = locked_file(filename, open_mode, block=False).__enter__()
 652             except LockingUnsupportedError:
 653                 stream = open(filename, open_mode)
 654             return (stream, filename)
 655         except (IOError, OSError) as err:
 656             if attempt or err.errno in (errno.EACCES,):
 657                 raise
 658             old_filename, filename = filename, sanitize_path(filename)
 659             if old_filename == filename:
 660                 raise
 661
 662
 663 def timeconvert(timestr):
 664     """Convert RFC 2822 defined time string into system timestamp"""
 665     timestamp = None
 666     timetuple = email.utils.parsedate_tz(timestr)
 667     if timetuple is not None:
 668         timestamp = email.utils.mktime_tz(timetuple)
 669     return timestamp
 670
 671
 672 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 673     """Sanitizes a string so it could be used as part of a filename.
 674     @param restricted   Use a stricter subset of allowed characters
 675     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 676                         If unset, yt-dlp's new sanitization rules are in effect
 677     """
 678     if s == '':
 679         return ''
 680
 681     def replace_insane(char):
 682         if restricted and char in ACCENT_CHARS:
 683             return ACCENT_CHARS[char]
 684         elif not restricted and char == '\n':
 685             return '\0 '
 686         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 687             return ''
 688         elif char == '"':
 689             return '' if restricted else '\''
 690         elif char == ':':
 691             return '\0_\0-' if restricted else '\0 \0-'
 692         elif char in '\\/|*<>':
 693             return '\0_'
 694         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 695             return '\0_'
 696         return char
 697
 698     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 699     result = ''.join(map(replace_insane, s))
 700     if is_id is NO_DEFAULT:
 701         result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result)  # Remove repeated substitute chars
 702         STRIP_RE = '(?:\0.|[ _-])*'
 703         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 704     result = result.replace('\0', '') or '_'
 705
 706     if not is_id:
 707         while '__' in result:
 708             result = result.replace('__', '_')
 709         result = result.strip('_')
 710         # Common case of "Foreign band name - English song title"
 711         if restricted and result.startswith('-_'):
 712             result = result[2:]
 713         if result.startswith('-'):
 714             result = '_' + result[len('-'):]
 715         result = result.lstrip('.')
 716         if not result:
 717             result = '_'
 718     return result
 719
 720
 721 def sanitize_path(s, force=False):
 722     """Sanitizes and normalizes path on Windows"""
 723     if sys.platform == 'win32':
 724         force = False
 725         drive_or_unc, _ = os.path.splitdrive(s)
 726     elif force:
 727         drive_or_unc = ''
 728     else:
 729         return s
 730
 731     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 732     if drive_or_unc:
 733         norm_path.pop(0)
 734     sanitized_path = [
 735         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 736         for path_part in norm_path]
 737     if drive_or_unc:
 738         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 739     elif force and s and s[0] == os.path.sep:
 740         sanitized_path.insert(0, os.path.sep)
 741     return os.path.join(*sanitized_path)
 742
 743
 744 def sanitize_url(url):
 745     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 746     # the number of unwanted failures due to missing protocol
 747     if url.startswith('//'):
 748         return 'http:%s' % url
 749     # Fix some common typos seen so far
 750     COMMON_TYPOS = (
 751         # https://github.com/ytdl-org/youtube-dl/issues/15649
 752         (r'^httpss://', r'https://'),
 753         # https://bx1.be/lives/direct-tv/
 754         (r'^rmtp([es]?)://', r'rtmp\1://'),
 755     )
 756     for mistake, fixup in COMMON_TYPOS:
 757         if re.match(mistake, url):
 758             return re.sub(mistake, fixup, url)
 759     return url
 760
 761
 762 def extract_basic_auth(url):
 763     parts = compat_urlparse.urlsplit(url)
 764     if parts.username is None:
 765         return url, None
 766     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
 767         parts.hostname if parts.port is None
 768         else '%s:%d' % (parts.hostname, parts.port))))
 769     auth_payload = base64.b64encode(
 770         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
 771     return url, 'Basic ' + auth_payload.decode('utf-8')
 772
 773
 774 def sanitized_Request(url, *args, **kwargs):
 775     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 776     if auth_header is not None:
 777         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 778         headers['Authorization'] = auth_header
 779     return compat_urllib_request.Request(url, *args, **kwargs)
 780
 781
 782 def expand_path(s):
 783     """Expand shell variables and ~"""
 784     return os.path.expandvars(compat_expanduser(s))
 785
 786
 787 def orderedSet(iterable):
 788     """ Remove all duplicates from the input iterable """
 789     res = []
 790     for el in iterable:
 791         if el not in res:
 792             res.append(el)
 793     return res
 794
 795
 796 def _htmlentity_transform(entity_with_semicolon):
 797     """Transforms an HTML entity to a character."""
 798     entity = entity_with_semicolon[:-1]
 799
 800     # Known non-numeric HTML entity
 801     if entity in compat_html_entities.name2codepoint:
 802         return compat_chr(compat_html_entities.name2codepoint[entity])
 803
 804     # TODO: HTML5 allows entities without a semicolon. For example,
 805     # '&Eacuteric' should be decoded as 'Éric'.
 806     if entity_with_semicolon in compat_html_entities_html5:
 807         return compat_html_entities_html5[entity_with_semicolon]
 808
 809     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 810     if mobj is not None:
 811         numstr = mobj.group(1)
 812         if numstr.startswith('x'):
 813             base = 16
 814             numstr = '0%s' % numstr
 815         else:
 816             base = 10
 817         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 818         try:
 819             return compat_chr(int(numstr, base))
 820         except ValueError:
 821             pass
 822
 823     # Unknown entity in name, return its literal representation
 824     return '&%s;' % entity
 825
 826
 827 def unescapeHTML(s):
 828     if s is None:
 829         return None
 830     assert type(s) == compat_str
 831
 832     return re.sub(
 833         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 834
 835
 836 def escapeHTML(text):
 837     return (
 838         text
 839         .replace('&', '&amp;')
 840         .replace('<', '&lt;')
 841         .replace('>', '&gt;')
 842         .replace('"', '&quot;')
 843         .replace("'", '&#39;')
 844     )
 845
 846
 847 def process_communicate_or_kill(p, *args, **kwargs):
 848     try:
 849         return p.communicate(*args, **kwargs)
 850     except BaseException:  # Including KeyboardInterrupt
 851         p.kill()
 852         p.wait()
 853         raise
 854
 855
 856 class Popen(subprocess.Popen):
 857     if sys.platform == 'win32':
 858         _startupinfo = subprocess.STARTUPINFO()
 859         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 860     else:
 861         _startupinfo = None
 862
 863     def __init__(self, *args, **kwargs):
 864         super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
 865
 866     def communicate_or_kill(self, *args, **kwargs):
 867         return process_communicate_or_kill(self, *args, **kwargs)
 868
 869
 870 def get_subprocess_encoding():
 871     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 872         # For subprocess calls, encode with locale encoding
 873         # Refer to http://stackoverflow.com/a/9951851/35070
 874         encoding = preferredencoding()
 875     else:
 876         encoding = sys.getfilesystemencoding()
 877     if encoding is None:
 878         encoding = 'utf-8'
 879     return encoding
 880
 881
 882 def encodeFilename(s, for_subprocess=False):
 883     assert type(s) == str
 884     return s
 885
 886
 887 def decodeFilename(b, for_subprocess=False):
 888     return b
 889
 890
 891 def encodeArgument(s):
 892     # Legacy code that uses byte strings
 893     # Uncomment the following line after fixing all post processors
 894     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 895     return s if isinstance(s, str) else s.decode('ascii')
 896
 897
 898 def decodeArgument(b):
 899     return b
 900
 901
 902 def decodeOption(optval):
 903     if optval is None:
 904         return optval
 905     if isinstance(optval, bytes):
 906         optval = optval.decode(preferredencoding())
 907
 908     assert isinstance(optval, compat_str)
 909     return optval
 910
 911
 912 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 913
 914
 915 def timetuple_from_msec(msec):
 916     secs, msec = divmod(msec, 1000)
 917     mins, secs = divmod(secs, 60)
 918     hrs, mins = divmod(mins, 60)
 919     return _timetuple(hrs, mins, secs, msec)
 920
 921
 922 def formatSeconds(secs, delim=':', msec=False):
 923     time = timetuple_from_msec(secs * 1000)
 924     if time.hours:
 925         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 926     elif time.minutes:
 927         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 928     else:
 929         ret = '%d' % time.seconds
 930     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 931
 932
 933 def _ssl_load_windows_store_certs(ssl_context, storename):
 934     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 935     try:
 936         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 937                  if encoding == 'x509_asn' and (
 938                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 939     except PermissionError:
 940         return
 941     for cert in certs:
 942         try:
 943             ssl_context.load_verify_locations(cadata=cert)
 944         except ssl.SSLError:
 945             pass
 946
 947
 948 def make_HTTPS_handler(params, **kwargs):
 949     opts_check_certificate = not params.get('nocheckcertificate')
 950     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 951     context.check_hostname = opts_check_certificate
 952     if params.get('legacyserverconnect'):
 953         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 954     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 955     if opts_check_certificate:
 956         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 957             context.load_verify_locations(cafile=certifi.where())
 958         else:
 959             try:
 960                 context.load_default_certs()
 961                 # Work around the issue in load_default_certs when there are bad certificates. See:
 962                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
 963                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 964             except ssl.SSLError:
 965                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 966                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 967                     # Create a new context to discard any certificates that were already loaded
 968                     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 969                     context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
 970                     for storename in ('CA', 'ROOT'):
 971                         _ssl_load_windows_store_certs(context, storename)
 972                 context.set_default_verify_paths()
 973     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 974
 975
 976 def bug_reports_message(before=';'):
 977     msg = ('please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , '
 978            'filling out the appropriate issue template. '
 979            'Confirm you are on the latest version using  yt-dlp -U')
 980
 981     before = before.rstrip()
 982     if not before or before.endswith(('.', '!', '?')):
 983         msg = msg[0].title() + msg[1:]
 984
 985     return (before + ' ' if before else '') + msg
 986
 987
 988 class YoutubeDLError(Exception):
 989     """Base exception for YoutubeDL errors."""
 990     msg = None
 991
 992     def __init__(self, msg=None):
 993         if msg is not None:
 994             self.msg = msg
 995         elif self.msg is None:
 996             self.msg = type(self).__name__
 997         super().__init__(self.msg)
 998
 999
1000 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
1001 if hasattr(ssl, 'CertificateError'):
1002     network_exceptions.append(ssl.CertificateError)
1003 network_exceptions = tuple(network_exceptions)
1004
1005
1006 class ExtractorError(YoutubeDLError):
1007     """Error during info extraction."""
1008
1009     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1010         """ tb, if given, is the original traceback (so that it can be printed out).
1011         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1012         """
1013         if sys.exc_info()[0] in network_exceptions:
1014             expected = True
1015
1016         self.orig_msg = str(msg)
1017         self.traceback = tb
1018         self.expected = expected
1019         self.cause = cause
1020         self.video_id = video_id
1021         self.ie = ie
1022         self.exc_info = sys.exc_info()  # preserve original exception
1023
1024         super(ExtractorError, self).__init__(''.join((
1025             format_field(ie, template='[%s] '),
1026             format_field(video_id, template='%s: '),
1027             msg,
1028             format_field(cause, template=' (caused by %r)'),
1029             '' if expected else bug_reports_message())))
1030
1031     def format_traceback(self):
1032         return join_nonempty(
1033             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1034             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1035             delim='\n') or None
1036
1037
1038 class UnsupportedError(ExtractorError):
1039     def __init__(self, url):
1040         super(UnsupportedError, self).__init__(
1041             'Unsupported URL: %s' % url, expected=True)
1042         self.url = url
1043
1044
1045 class RegexNotFoundError(ExtractorError):
1046     """Error when a regex didn't match"""
1047     pass
1048
1049
1050 class GeoRestrictedError(ExtractorError):
1051     """Geographic restriction Error exception.
1052
1053     This exception may be thrown when a video is not available from your
1054     geographic location due to geographic restrictions imposed by a website.
1055     """
1056
1057     def __init__(self, msg, countries=None, **kwargs):
1058         kwargs['expected'] = True
1059         super(GeoRestrictedError, self).__init__(msg, **kwargs)
1060         self.countries = countries
1061
1062
1063 class DownloadError(YoutubeDLError):
1064     """Download Error exception.
1065
1066     This exception may be thrown by FileDownloader objects if they are not
1067     configured to continue on errors. They will contain the appropriate
1068     error message.
1069     """
1070
1071     def __init__(self, msg, exc_info=None):
1072         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1073         super(DownloadError, self).__init__(msg)
1074         self.exc_info = exc_info
1075
1076
1077 class EntryNotInPlaylist(YoutubeDLError):
1078     """Entry not in playlist exception.
1079
1080     This exception will be thrown by YoutubeDL when a requested entry
1081     is not found in the playlist info_dict
1082     """
1083     msg = 'Entry not found in info'
1084
1085
1086 class SameFileError(YoutubeDLError):
1087     """Same File exception.
1088
1089     This exception will be thrown by FileDownloader objects if they detect
1090     multiple files would have to be downloaded to the same file on disk.
1091     """
1092     msg = 'Fixed output name but more than one file to download'
1093
1094     def __init__(self, filename=None):
1095         if filename is not None:
1096             self.msg += f': {filename}'
1097         super().__init__(self.msg)
1098
1099
1100 class PostProcessingError(YoutubeDLError):
1101     """Post Processing exception.
1102
1103     This exception may be raised by PostProcessor's .run() method to
1104     indicate an error in the postprocessing task.
1105     """
1106
1107
1108 class DownloadCancelled(YoutubeDLError):
1109     """ Exception raised when the download queue should be interrupted """
1110     msg = 'The download was cancelled'
1111
1112
1113 class ExistingVideoReached(DownloadCancelled):
1114     """ --break-on-existing triggered """
1115     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1116
1117
1118 class RejectedVideoReached(DownloadCancelled):
1119     """ --break-on-reject triggered """
1120     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1121
1122
1123 class MaxDownloadsReached(DownloadCancelled):
1124     """ --max-downloads limit has been reached. """
1125     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1126
1127
1128 class ReExtractInfo(YoutubeDLError):
1129     """ Video info needs to be re-extracted. """
1130
1131     def __init__(self, msg, expected=False):
1132         super().__init__(msg)
1133         self.expected = expected
1134
1135
1136 class ThrottledDownload(ReExtractInfo):
1137     """ Download speed below --throttled-rate. """
1138     msg = 'The download speed is below throttle limit'
1139
1140     def __init__(self):
1141         super().__init__(self.msg, expected=False)
1142
1143
1144 class UnavailableVideoError(YoutubeDLError):
1145     """Unavailable Format exception.
1146
1147     This exception will be thrown when a video is requested
1148     in a format that is not available for that video.
1149     """
1150     msg = 'Unable to download video'
1151
1152     def __init__(self, err=None):
1153         if err is not None:
1154             self.msg += f': {err}'
1155         super().__init__(self.msg)
1156
1157
1158 class ContentTooShortError(YoutubeDLError):
1159     """Content Too Short exception.
1160
1161     This exception may be raised by FileDownloader objects when a file they
1162     download is too small for what the server announced first, indicating
1163     the connection was probably interrupted.
1164     """
1165
1166     def __init__(self, downloaded, expected):
1167         super(ContentTooShortError, self).__init__(
1168             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
1169         )
1170         # Both in bytes
1171         self.downloaded = downloaded
1172         self.expected = expected
1173
1174
1175 class XAttrMetadataError(YoutubeDLError):
1176     def __init__(self, code=None, msg='Unknown error'):
1177         super(XAttrMetadataError, self).__init__(msg)
1178         self.code = code
1179         self.msg = msg
1180
1181         # Parsing code and msg
1182         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1183                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1184             self.reason = 'NO_SPACE'
1185         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1186             self.reason = 'VALUE_TOO_LONG'
1187         else:
1188             self.reason = 'NOT_SUPPORTED'
1189
1190
1191 class XAttrUnavailableError(YoutubeDLError):
1192     pass
1193
1194
1195 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1196     hc = http_class(*args, **compat_kwargs(kwargs))
1197     source_address = ydl_handler._params.get('source_address')
1198
1199     if source_address is not None:
1200         # This is to workaround _create_connection() from socket where it will try all
1201         # address data from getaddrinfo() including IPv6. This filters the result from
1202         # getaddrinfo() based on the source_address value.
1203         # This is based on the cpython socket.create_connection() function.
1204         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1205         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1206             host, port = address
1207             err = None
1208             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1209             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1210             ip_addrs = [addr for addr in addrs if addr[0] == af]
1211             if addrs and not ip_addrs:
1212                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1213                 raise socket.error(
1214                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1215                     % (ip_version, source_address[0]))
1216             for res in ip_addrs:
1217                 af, socktype, proto, canonname, sa = res
1218                 sock = None
1219                 try:
1220                     sock = socket.socket(af, socktype, proto)
1221                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1222                         sock.settimeout(timeout)
1223                     sock.bind(source_address)
1224                     sock.connect(sa)
1225                     err = None  # Explicitly break reference cycle
1226                     return sock
1227                 except socket.error as _:
1228                     err = _
1229                     if sock is not None:
1230                         sock.close()
1231             if err is not None:
1232                 raise err
1233             else:
1234                 raise socket.error('getaddrinfo returns an empty list')
1235         if hasattr(hc, '_create_connection'):
1236             hc._create_connection = _create_connection
1237         hc.source_address = (source_address, 0)
1238
1239     return hc
1240
1241
1242 def handle_youtubedl_headers(headers):
1243     filtered_headers = headers
1244
1245     if 'Youtubedl-no-compression' in filtered_headers:
1246         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
1247         del filtered_headers['Youtubedl-no-compression']
1248
1249     return filtered_headers
1250
1251
1252 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
1253     """Handler for HTTP requests and responses.
1254
1255     This class, when installed with an OpenerDirector, automatically adds
1256     the standard headers to every HTTP request and handles gzipped and
1257     deflated responses from web servers. If compression is to be avoided in
1258     a particular request, the original request in the program code only has
1259     to include the HTTP header "Youtubedl-no-compression", which will be
1260     removed before making the real request.
1261
1262     Part of this code was copied from:
1263
1264     http://techknack.net/python-urllib2-handlers/
1265
1266     Andrew Rowls, the author of that code, agreed to release it to the
1267     public domain.
1268     """
1269
1270     def __init__(self, params, *args, **kwargs):
1271         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
1272         self._params = params
1273
1274     def http_open(self, req):
1275         conn_class = compat_http_client.HTTPConnection
1276
1277         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1278         if socks_proxy:
1279             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1280             del req.headers['Ytdl-socks-proxy']
1281
1282         return self.do_open(functools.partial(
1283             _create_http_connection, self, conn_class, False),
1284             req)
1285
1286     @staticmethod
1287     def deflate(data):
1288         if not data:
1289             return data
1290         try:
1291             return zlib.decompress(data, -zlib.MAX_WBITS)
1292         except zlib.error:
1293             return zlib.decompress(data)
1294
1295     @staticmethod
1296     def brotli(data):
1297         if not data:
1298             return data
1299         return compat_brotli.decompress(data)
1300
1301     def http_request(self, req):
1302         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1303         # always respected by websites, some tend to give out URLs with non percent-encoded
1304         # non-ASCII characters (see telemb.py, ard.py [#3412])
1305         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1306         # To work around aforementioned issue we will replace request's original URL with
1307         # percent-encoded one
1308         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1309         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1310         url = req.get_full_url()
1311         url_escaped = escape_url(url)
1312
1313         # Substitute URL if any change after escaping
1314         if url != url_escaped:
1315             req = update_Request(req, url=url_escaped)
1316
1317         for h, v in self._params.get('http_headers', std_headers).items():
1318             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1319             # The dict keys are capitalized because of this bug by urllib
1320             if h.capitalize() not in req.headers:
1321                 req.add_header(h, v)
1322
1323         if 'Accept-encoding' not in req.headers:
1324             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1325
1326         req.headers = handle_youtubedl_headers(req.headers)
1327
1328         return req
1329
1330     def http_response(self, req, resp):
1331         old_resp = resp
1332         # gzip
1333         if resp.headers.get('Content-encoding', '') == 'gzip':
1334             content = resp.read()
1335             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1336             try:
1337                 uncompressed = io.BytesIO(gz.read())
1338             except IOError as original_ioerror:
1339                 # There may be junk add the end of the file
1340                 # See http://stackoverflow.com/q/4928560/35070 for details
1341                 for i in range(1, 1024):
1342                     try:
1343                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1344                         uncompressed = io.BytesIO(gz.read())
1345                     except IOError:
1346                         continue
1347                     break
1348                 else:
1349                     raise original_ioerror
1350             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1351             resp.msg = old_resp.msg
1352             del resp.headers['Content-encoding']
1353         # deflate
1354         if resp.headers.get('Content-encoding', '') == 'deflate':
1355             gz = io.BytesIO(self.deflate(resp.read()))
1356             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1357             resp.msg = old_resp.msg
1358             del resp.headers['Content-encoding']
1359         # brotli
1360         if resp.headers.get('Content-encoding', '') == 'br':
1361             resp = compat_urllib_request.addinfourl(
1362                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1363             resp.msg = old_resp.msg
1364             del resp.headers['Content-encoding']
1365         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1366         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1367         if 300 <= resp.code < 400:
1368             location = resp.headers.get('Location')
1369             if location:
1370                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1371                 location = location.encode('iso-8859-1').decode('utf-8')
1372                 location_escaped = escape_url(location)
1373                 if location != location_escaped:
1374                     del resp.headers['Location']
1375                     resp.headers['Location'] = location_escaped
1376         return resp
1377
1378     https_request = http_request
1379     https_response = http_response
1380
1381
1382 def make_socks_conn_class(base_class, socks_proxy):
1383     assert issubclass(base_class, (
1384         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
1385
1386     url_components = compat_urlparse.urlparse(socks_proxy)
1387     if url_components.scheme.lower() == 'socks5':
1388         socks_type = ProxyType.SOCKS5
1389     elif url_components.scheme.lower() in ('socks', 'socks4'):
1390         socks_type = ProxyType.SOCKS4
1391     elif url_components.scheme.lower() == 'socks4a':
1392         socks_type = ProxyType.SOCKS4A
1393
1394     def unquote_if_non_empty(s):
1395         if not s:
1396             return s
1397         return compat_urllib_parse_unquote_plus(s)
1398
1399     proxy_args = (
1400         socks_type,
1401         url_components.hostname, url_components.port or 1080,
1402         True,  # Remote DNS
1403         unquote_if_non_empty(url_components.username),
1404         unquote_if_non_empty(url_components.password),
1405     )
1406
1407     class SocksConnection(base_class):
1408         def connect(self):
1409             self.sock = sockssocket()
1410             self.sock.setproxy(*proxy_args)
1411             if type(self.timeout) in (int, float):
1412                 self.sock.settimeout(self.timeout)
1413             self.sock.connect((self.host, self.port))
1414
1415             if isinstance(self, compat_http_client.HTTPSConnection):
1416                 if hasattr(self, '_context'):  # Python > 2.6
1417                     self.sock = self._context.wrap_socket(
1418                         self.sock, server_hostname=self.host)
1419                 else:
1420                     self.sock = ssl.wrap_socket(self.sock)
1421
1422     return SocksConnection
1423
1424
1425 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
1426     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1427         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
1428         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
1429         self._params = params
1430
1431     def https_open(self, req):
1432         kwargs = {}
1433         conn_class = self._https_conn_class
1434
1435         if hasattr(self, '_context'):  # python > 2.6
1436             kwargs['context'] = self._context
1437         if hasattr(self, '_check_hostname'):  # python 3.x
1438             kwargs['check_hostname'] = self._check_hostname
1439
1440         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1441         if socks_proxy:
1442             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1443             del req.headers['Ytdl-socks-proxy']
1444
1445         return self.do_open(functools.partial(
1446             _create_http_connection, self, conn_class, True),
1447             req, **kwargs)
1448
1449
1450 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1451     """
1452     See [1] for cookie file format.
1453
1454     1. https://curl.haxx.se/docs/http-cookies.html
1455     """
1456     _HTTPONLY_PREFIX = '#HttpOnly_'
1457     _ENTRY_LEN = 7
1458     _HEADER = '''# Netscape HTTP Cookie File
1459 # This file is generated by yt-dlp.  Do not edit.
1460
1461 '''
1462     _CookieFileEntry = collections.namedtuple(
1463         'CookieFileEntry',
1464         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1465
1466     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1467         """
1468         Save cookies to a file.
1469
1470         Most of the code is taken from CPython 3.8 and slightly adapted
1471         to support cookie files with UTF-8 in both python 2 and 3.
1472         """
1473         if filename is None:
1474             if self.filename is not None:
1475                 filename = self.filename
1476             else:
1477                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1478
1479         # Store session cookies with `expires` set to 0 instead of an empty
1480         # string
1481         for cookie in self:
1482             if cookie.expires is None:
1483                 cookie.expires = 0
1484
1485         with io.open(filename, 'w', encoding='utf-8') as f:
1486             f.write(self._HEADER)
1487             now = time.time()
1488             for cookie in self:
1489                 if not ignore_discard and cookie.discard:
1490                     continue
1491                 if not ignore_expires and cookie.is_expired(now):
1492                     continue
1493                 if cookie.secure:
1494                     secure = 'TRUE'
1495                 else:
1496                     secure = 'FALSE'
1497                 if cookie.domain.startswith('.'):
1498                     initial_dot = 'TRUE'
1499                 else:
1500                     initial_dot = 'FALSE'
1501                 if cookie.expires is not None:
1502                     expires = compat_str(cookie.expires)
1503                 else:
1504                     expires = ''
1505                 if cookie.value is None:
1506                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
1507                     # with no name, whereas http.cookiejar regards it as a
1508                     # cookie with no value.
1509                     name = ''
1510                     value = cookie.name
1511                 else:
1512                     name = cookie.name
1513                     value = cookie.value
1514                 f.write(
1515                     '\t'.join([cookie.domain, initial_dot, cookie.path,
1516                                secure, expires, name, value]) + '\n')
1517
1518     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1519         """Load cookies from a file."""
1520         if filename is None:
1521             if self.filename is not None:
1522                 filename = self.filename
1523             else:
1524                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1525
1526         def prepare_line(line):
1527             if line.startswith(self._HTTPONLY_PREFIX):
1528                 line = line[len(self._HTTPONLY_PREFIX):]
1529             # comments and empty lines are fine
1530             if line.startswith('#') or not line.strip():
1531                 return line
1532             cookie_list = line.split('\t')
1533             if len(cookie_list) != self._ENTRY_LEN:
1534                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
1535             cookie = self._CookieFileEntry(*cookie_list)
1536             if cookie.expires_at and not cookie.expires_at.isdigit():
1537                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1538             return line
1539
1540         cf = io.StringIO()
1541         with io.open(filename, encoding='utf-8') as f:
1542             for line in f:
1543                 try:
1544                     cf.write(prepare_line(line))
1545                 except compat_cookiejar.LoadError as e:
1546                     write_string(
1547                         'WARNING: skipping cookie file entry due to %s: %r\n'
1548                         % (e, line), sys.stderr)
1549                     continue
1550         cf.seek(0)
1551         self._really_load(cf, filename, ignore_discard, ignore_expires)
1552         # Session cookies are denoted by either `expires` field set to
1553         # an empty string or 0. MozillaCookieJar only recognizes the former
1554         # (see [1]). So we need force the latter to be recognized as session
1555         # cookies on our own.
1556         # Session cookies may be important for cookies-based authentication,
1557         # e.g. usually, when user does not check 'Remember me' check box while
1558         # logging in on a site, some important cookies are stored as session
1559         # cookies so that not recognizing them will result in failed login.
1560         # 1. https://bugs.python.org/issue17164
1561         for cookie in self:
1562             # Treat `expires=0` cookies as session cookies
1563             if cookie.expires == 0:
1564                 cookie.expires = None
1565                 cookie.discard = True
1566
1567
1568 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
1569     def __init__(self, cookiejar=None):
1570         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
1571
1572     def http_response(self, request, response):
1573         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
1574
1575     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
1576     https_response = http_response
1577
1578
1579 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
1580     """YoutubeDL redirect handler
1581
1582     The code is based on HTTPRedirectHandler implementation from CPython [1].
1583
1584     This redirect handler solves two issues:
1585      - ensures redirect URL is always unicode under python 2
1586      - introduces support for experimental HTTP response status code
1587        308 Permanent Redirect [2] used by some sites [3]
1588
1589     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1590     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1591     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1592     """
1593
1594     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
1595
1596     def redirect_request(self, req, fp, code, msg, headers, newurl):
1597         """Return a Request or None in response to a redirect.
1598
1599         This is called by the http_error_30x methods when a
1600         redirection response is received.  If a redirection should
1601         take place, return a new Request to allow http_error_30x to
1602         perform the redirect.  Otherwise, raise HTTPError if no-one
1603         else should try to handle this url.  Return None if you can't
1604         but another Handler might.
1605         """
1606         m = req.get_method()
1607         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1608                  or code in (301, 302, 303) and m == "POST")):
1609             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
1610         # Strictly (according to RFC 2616), 301 or 302 in response to
1611         # a POST MUST NOT cause a redirection without confirmation
1612         # from the user (of urllib.request, in this case).  In practice,
1613         # essentially all clients do redirect in this case, so we do
1614         # the same.
1615
1616         # Be conciliant with URIs containing a space.  This is mainly
1617         # redundant with the more complete encoding done in http_error_302(),
1618         # but it is kept for compatibility with other callers.
1619         newurl = newurl.replace(' ', '%20')
1620
1621         CONTENT_HEADERS = ("content-length", "content-type")
1622         # NB: don't use dict comprehension for python 2.6 compatibility
1623         newheaders = dict((k, v) for k, v in req.headers.items()
1624                           if k.lower() not in CONTENT_HEADERS)
1625         return compat_urllib_request.Request(
1626             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1627             unverifiable=True)
1628
1629
1630 def extract_timezone(date_str):
1631     m = re.search(
1632         r'''(?x)
1633             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1634             (?P<tz>Z|                                            # just the UTC Z, or
1635                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1636                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1637                    [ ]?                                          # optional space
1638                 (?P<sign>\+|-)                                   # +/-
1639                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1640             $)
1641         ''', date_str)
1642     if not m:
1643         timezone = datetime.timedelta()
1644     else:
1645         date_str = date_str[:-len(m.group('tz'))]
1646         if not m.group('sign'):
1647             timezone = datetime.timedelta()
1648         else:
1649             sign = 1 if m.group('sign') == '+' else -1
1650             timezone = datetime.timedelta(
1651                 hours=sign * int(m.group('hours')),
1652                 minutes=sign * int(m.group('minutes')))
1653     return timezone, date_str
1654
1655
1656 def parse_iso8601(date_str, delimiter='T', timezone=None):
1657     """ Return a UNIX timestamp from the given date """
1658
1659     if date_str is None:
1660         return None
1661
1662     date_str = re.sub(r'\.[0-9]+', '', date_str)
1663
1664     if timezone is None:
1665         timezone, date_str = extract_timezone(date_str)
1666
1667     try:
1668         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
1669         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1670         return calendar.timegm(dt.timetuple())
1671     except ValueError:
1672         pass
1673
1674
1675 def date_formats(day_first=True):
1676     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1677
1678
1679 def unified_strdate(date_str, day_first=True):
1680     """Return a string with the date in the format YYYYMMDD"""
1681
1682     if date_str is None:
1683         return None
1684     upload_date = None
1685     # Replace commas
1686     date_str = date_str.replace(',', ' ')
1687     # Remove AM/PM + timezone
1688     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1689     _, date_str = extract_timezone(date_str)
1690
1691     for expression in date_formats(day_first):
1692         try:
1693             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1694         except ValueError:
1695             pass
1696     if upload_date is None:
1697         timetuple = email.utils.parsedate_tz(date_str)
1698         if timetuple:
1699             try:
1700                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1701             except ValueError:
1702                 pass
1703     if upload_date is not None:
1704         return compat_str(upload_date)
1705
1706
1707 def unified_timestamp(date_str, day_first=True):
1708     if date_str is None:
1709         return None
1710
1711     date_str = re.sub(r'[,|]', '', date_str)
1712
1713     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1714     timezone, date_str = extract_timezone(date_str)
1715
1716     # Remove AM/PM + timezone
1717     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1718
1719     # Remove unrecognized timezones from ISO 8601 alike timestamps
1720     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1721     if m:
1722         date_str = date_str[:-len(m.group('tz'))]
1723
1724     # Python only supports microseconds, so remove nanoseconds
1725     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1726     if m:
1727         date_str = m.group(1)
1728
1729     for expression in date_formats(day_first):
1730         try:
1731             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1732             return calendar.timegm(dt.timetuple())
1733         except ValueError:
1734             pass
1735     timetuple = email.utils.parsedate_tz(date_str)
1736     if timetuple:
1737         return calendar.timegm(timetuple) + pm_delta * 3600
1738
1739
1740 def determine_ext(url, default_ext='unknown_video'):
1741     if url is None or '.' not in url:
1742         return default_ext
1743     guess = url.partition('?')[0].rpartition('.')[2]
1744     if re.match(r'^[A-Za-z0-9]+$', guess):
1745         return guess
1746     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1747     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1748         return guess.rstrip('/')
1749     else:
1750         return default_ext
1751
1752
1753 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1754     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1755
1756
1757 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1758     """
1759     Return a datetime object from a string in the format YYYYMMDD or
1760     (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
1761
1762     format: string date format used to return datetime object from
1763     precision: round the time portion of a datetime object.
1764                 auto|microsecond|second|minute|hour|day.
1765                 auto: round to the unit provided in date_str (if applicable).
1766     """
1767     auto_precision = False
1768     if precision == 'auto':
1769         auto_precision = True
1770         precision = 'microsecond'
1771     today = datetime_round(datetime.datetime.utcnow(), precision)
1772     if date_str in ('now', 'today'):
1773         return today
1774     if date_str == 'yesterday':
1775         return today - datetime.timedelta(days=1)
1776     match = re.match(
1777         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
1778         date_str)
1779     if match is not None:
1780         start_time = datetime_from_str(match.group('start'), precision, format)
1781         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1782         unit = match.group('unit')
1783         if unit == 'month' or unit == 'year':
1784             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1785             unit = 'day'
1786         else:
1787             if unit == 'week':
1788                 unit = 'day'
1789                 time *= 7
1790             delta = datetime.timedelta(**{unit + 's': time})
1791             new_date = start_time + delta
1792         if auto_precision:
1793             return datetime_round(new_date, unit)
1794         return new_date
1795
1796     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1797
1798
1799 def date_from_str(date_str, format='%Y%m%d', strict=False):
1800     """
1801     Return a datetime object from a string in the format YYYYMMDD or
1802     (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
1803
1804     If "strict", only (now|today)[+-][0-9](day|week|month|year)(s)? is allowed
1805
1806     format: string date format used to return datetime object from
1807     """
1808     if strict and not re.fullmatch(r'\d{8}|(now|today)[+-]\d+(day|week|month|year)(s)?', date_str):
1809         raise ValueError(f'Invalid date format {date_str}')
1810     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1811
1812
1813 def datetime_add_months(dt, months):
1814     """Increment/Decrement a datetime object by months."""
1815     month = dt.month + months - 1
1816     year = dt.year + month // 12
1817     month = month % 12 + 1
1818     day = min(dt.day, calendar.monthrange(year, month)[1])
1819     return dt.replace(year, month, day)
1820
1821
1822 def datetime_round(dt, precision='day'):
1823     """
1824     Round a datetime object's time to a specific precision
1825     """
1826     if precision == 'microsecond':
1827         return dt
1828
1829     unit_seconds = {
1830         'day': 86400,
1831         'hour': 3600,
1832         'minute': 60,
1833         'second': 1,
1834     }
1835     roundto = lambda x, n: ((x + n / 2) // n) * n
1836     timestamp = calendar.timegm(dt.timetuple())
1837     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1838
1839
1840 def hyphenate_date(date_str):
1841     """
1842     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1843     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1844     if match is not None:
1845         return '-'.join(match.groups())
1846     else:
1847         return date_str
1848
1849
1850 class DateRange(object):
1851     """Represents a time interval between two dates"""
1852
1853     def __init__(self, start=None, end=None):
1854         """start and end must be strings in the format accepted by date"""
1855         if start is not None:
1856             self.start = date_from_str(start, strict=True)
1857         else:
1858             self.start = datetime.datetime.min.date()
1859         if end is not None:
1860             self.end = date_from_str(end, strict=True)
1861         else:
1862             self.end = datetime.datetime.max.date()
1863         if self.start > self.end:
1864             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1865
1866     @classmethod
1867     def day(cls, day):
1868         """Returns a range that only contains the given day"""
1869         return cls(day, day)
1870
1871     def __contains__(self, date):
1872         """Check if the date is in the range"""
1873         if not isinstance(date, datetime.date):
1874             date = date_from_str(date)
1875         return self.start <= date <= self.end
1876
1877     def __str__(self):
1878         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
1879
1880
1881 def platform_name():
1882     """ Returns the platform name as a compat_str """
1883     res = platform.platform()
1884     if isinstance(res, bytes):
1885         res = res.decode(preferredencoding())
1886
1887     assert isinstance(res, compat_str)
1888     return res
1889
1890
1891 def get_windows_version():
1892     ''' Get Windows version. None if it's not running on Windows '''
1893     if compat_os_name == 'nt':
1894         return version_tuple(platform.win32_ver()[1])
1895     else:
1896         return None
1897
1898
1899 def write_string(s, out=None, encoding=None):
1900     if out is None:
1901         out = sys.stderr
1902     assert type(s) == compat_str
1903
1904     if 'b' in getattr(out, 'mode', ''):
1905         byt = s.encode(encoding or preferredencoding(), 'ignore')
1906         out.write(byt)
1907     elif hasattr(out, 'buffer'):
1908         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1909         byt = s.encode(enc, 'ignore')
1910         out.buffer.write(byt)
1911     else:
1912         out.write(s)
1913     out.flush()
1914
1915
1916 def bytes_to_intlist(bs):
1917     if not bs:
1918         return []
1919     if isinstance(bs[0], int):  # Python 3
1920         return list(bs)
1921     else:
1922         return [ord(c) for c in bs]
1923
1924
1925 def intlist_to_bytes(xs):
1926     if not xs:
1927         return b''
1928     return compat_struct_pack('%dB' % len(xs), *xs)
1929
1930
1931 class LockingUnsupportedError(IOError):
1932     msg = 'File locking is not supported on this platform'
1933
1934     def __init__(self):
1935         super().__init__(self.msg)
1936
1937
1938 # Cross-platform file locking
1939 if sys.platform == 'win32':
1940     import ctypes.wintypes
1941     import msvcrt
1942
1943     class OVERLAPPED(ctypes.Structure):
1944         _fields_ = [
1945             ('Internal', ctypes.wintypes.LPVOID),
1946             ('InternalHigh', ctypes.wintypes.LPVOID),
1947             ('Offset', ctypes.wintypes.DWORD),
1948             ('OffsetHigh', ctypes.wintypes.DWORD),
1949             ('hEvent', ctypes.wintypes.HANDLE),
1950         ]
1951
1952     kernel32 = ctypes.windll.kernel32
1953     LockFileEx = kernel32.LockFileEx
1954     LockFileEx.argtypes = [
1955         ctypes.wintypes.HANDLE,     # hFile
1956         ctypes.wintypes.DWORD,      # dwFlags
1957         ctypes.wintypes.DWORD,      # dwReserved
1958         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1959         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1960         ctypes.POINTER(OVERLAPPED)  # Overlapped
1961     ]
1962     LockFileEx.restype = ctypes.wintypes.BOOL
1963     UnlockFileEx = kernel32.UnlockFileEx
1964     UnlockFileEx.argtypes = [
1965         ctypes.wintypes.HANDLE,     # hFile
1966         ctypes.wintypes.DWORD,      # dwReserved
1967         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1968         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1969         ctypes.POINTER(OVERLAPPED)  # Overlapped
1970     ]
1971     UnlockFileEx.restype = ctypes.wintypes.BOOL
1972     whole_low = 0xffffffff
1973     whole_high = 0x7fffffff
1974
1975     def _lock_file(f, exclusive, block):
1976         overlapped = OVERLAPPED()
1977         overlapped.Offset = 0
1978         overlapped.OffsetHigh = 0
1979         overlapped.hEvent = 0
1980         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1981
1982         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
1983                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
1984                           0, whole_low, whole_high, f._lock_file_overlapped_p):
1985             raise BlockingIOError('Locking file failed: %r' % ctypes.FormatError())
1986
1987     def _unlock_file(f):
1988         assert f._lock_file_overlapped_p
1989         handle = msvcrt.get_osfhandle(f.fileno())
1990         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
1991             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1992
1993 else:
1994     try:
1995         import fcntl
1996
1997         def _lock_file(f, exclusive, block):
1998             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
1999             if not block:
2000                 flags |= fcntl.LOCK_NB
2001             try:
2002                 fcntl.flock(f, flags)
2003             except BlockingIOError:
2004                 raise
2005             except OSError:  # AOSP does not have flock()
2006                 fcntl.lockf(f, flags)
2007
2008         def _unlock_file(f):
2009             try:
2010                 fcntl.flock(f, fcntl.LOCK_UN)
2011             except OSError:
2012                 fcntl.lockf(f, fcntl.LOCK_UN)
2013
2014     except ImportError:
2015
2016         def _lock_file(f, exclusive, block):
2017             raise LockingUnsupportedError()
2018
2019         def _unlock_file(f):
2020             raise LockingUnsupportedError()
2021
2022
2023 class locked_file(object):
2024     locked = False
2025
2026     def __init__(self, filename, mode, block=True, encoding=None):
2027         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2028             raise NotImplementedError(mode)
2029         self.mode, self.block = mode, block
2030
2031         writable = any(f in mode for f in 'wax+')
2032         readable = any(f in mode for f in 'r+')
2033         flags = functools.reduce(operator.ior, (
2034             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2035             getattr(os, 'O_BINARY', 0),  # Windows only
2036             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2037             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2038             os.O_APPEND if 'a' in mode else 0,
2039             os.O_EXCL if 'x' in mode else 0,
2040             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2041         ))
2042
2043         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2044
2045     def __enter__(self):
2046         exclusive = 'r' not in self.mode
2047         try:
2048             _lock_file(self.f, exclusive, self.block)
2049             self.locked = True
2050         except IOError:
2051             self.f.close()
2052             raise
2053         if 'w' in self.mode:
2054             self.f.truncate()
2055         return self
2056
2057     def unlock(self):
2058         if not self.locked:
2059             return
2060         try:
2061             _unlock_file(self.f)
2062         finally:
2063             self.locked = False
2064
2065     def __exit__(self, *_):
2066         try:
2067             self.unlock()
2068         finally:
2069             self.f.close()
2070
2071     open = __enter__
2072     close = __exit__
2073
2074     def __getattr__(self, attr):
2075         return getattr(self.f, attr)
2076
2077     def __iter__(self):
2078         return iter(self.f)
2079
2080
2081 def get_filesystem_encoding():
2082     encoding = sys.getfilesystemencoding()
2083     return encoding if encoding is not None else 'utf-8'
2084
2085
2086 def shell_quote(args):
2087     quoted_args = []
2088     encoding = get_filesystem_encoding()
2089     for a in args:
2090         if isinstance(a, bytes):
2091             # We may get a filename encoded with 'encodeFilename'
2092             a = a.decode(encoding)
2093         quoted_args.append(compat_shlex_quote(a))
2094     return ' '.join(quoted_args)
2095
2096
2097 def smuggle_url(url, data):
2098     """ Pass additional data in a URL for internal use. """
2099
2100     url, idata = unsmuggle_url(url, {})
2101     data.update(idata)
2102     sdata = compat_urllib_parse_urlencode(
2103         {'__youtubedl_smuggle': json.dumps(data)})
2104     return url + '#' + sdata
2105
2106
2107 def unsmuggle_url(smug_url, default=None):
2108     if '#__youtubedl_smuggle' not in smug_url:
2109         return smug_url, default
2110     url, _, sdata = smug_url.rpartition('#')
2111     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
2112     data = json.loads(jsond)
2113     return url, data
2114
2115
2116 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2117     """ Formats numbers with decimal sufixes like K, M, etc """
2118     num, factor = float_or_none(num), float(factor)
2119     if num is None or num < 0:
2120         return None
2121     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2122     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2123     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2124     if factor == 1024:
2125         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2126     converted = num / (factor ** exponent)
2127     return fmt % (converted, suffix)
2128
2129
2130 def format_bytes(bytes):
2131     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2132
2133
2134 def lookup_unit_table(unit_table, s):
2135     units_re = '|'.join(re.escape(u) for u in unit_table)
2136     m = re.match(
2137         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2138     if not m:
2139         return None
2140     num_str = m.group('num').replace(',', '.')
2141     mult = unit_table[m.group('unit')]
2142     return int(float(num_str) * mult)
2143
2144
2145 def parse_filesize(s):
2146     if s is None:
2147         return None
2148
2149     # The lower-case forms are of course incorrect and unofficial,
2150     # but we support those too
2151     _UNIT_TABLE = {
2152         'B': 1,
2153         'b': 1,
2154         'bytes': 1,
2155         'KiB': 1024,
2156         'KB': 1000,
2157         'kB': 1024,
2158         'Kb': 1000,
2159         'kb': 1000,
2160         'kilobytes': 1000,
2161         'kibibytes': 1024,
2162         'MiB': 1024 ** 2,
2163         'MB': 1000 ** 2,
2164         'mB': 1024 ** 2,
2165         'Mb': 1000 ** 2,
2166         'mb': 1000 ** 2,
2167         'megabytes': 1000 ** 2,
2168         'mebibytes': 1024 ** 2,
2169         'GiB': 1024 ** 3,
2170         'GB': 1000 ** 3,
2171         'gB': 1024 ** 3,
2172         'Gb': 1000 ** 3,
2173         'gb': 1000 ** 3,
2174         'gigabytes': 1000 ** 3,
2175         'gibibytes': 1024 ** 3,
2176         'TiB': 1024 ** 4,
2177         'TB': 1000 ** 4,
2178         'tB': 1024 ** 4,
2179         'Tb': 1000 ** 4,
2180         'tb': 1000 ** 4,
2181         'terabytes': 1000 ** 4,
2182         'tebibytes': 1024 ** 4,
2183         'PiB': 1024 ** 5,
2184         'PB': 1000 ** 5,
2185         'pB': 1024 ** 5,
2186         'Pb': 1000 ** 5,
2187         'pb': 1000 ** 5,
2188         'petabytes': 1000 ** 5,
2189         'pebibytes': 1024 ** 5,
2190         'EiB': 1024 ** 6,
2191         'EB': 1000 ** 6,
2192         'eB': 1024 ** 6,
2193         'Eb': 1000 ** 6,
2194         'eb': 1000 ** 6,
2195         'exabytes': 1000 ** 6,
2196         'exbibytes': 1024 ** 6,
2197         'ZiB': 1024 ** 7,
2198         'ZB': 1000 ** 7,
2199         'zB': 1024 ** 7,
2200         'Zb': 1000 ** 7,
2201         'zb': 1000 ** 7,
2202         'zettabytes': 1000 ** 7,
2203         'zebibytes': 1024 ** 7,
2204         'YiB': 1024 ** 8,
2205         'YB': 1000 ** 8,
2206         'yB': 1024 ** 8,
2207         'Yb': 1000 ** 8,
2208         'yb': 1000 ** 8,
2209         'yottabytes': 1000 ** 8,
2210         'yobibytes': 1024 ** 8,
2211     }
2212
2213     return lookup_unit_table(_UNIT_TABLE, s)
2214
2215
2216 def parse_count(s):
2217     if s is None:
2218         return None
2219
2220     s = re.sub(r'^[^\d]+\s', '', s).strip()
2221
2222     if re.match(r'^[\d,.]+$', s):
2223         return str_to_int(s)
2224
2225     _UNIT_TABLE = {
2226         'k': 1000,
2227         'K': 1000,
2228         'm': 1000 ** 2,
2229         'M': 1000 ** 2,
2230         'kk': 1000 ** 2,
2231         'KK': 1000 ** 2,
2232         'b': 1000 ** 3,
2233         'B': 1000 ** 3,
2234     }
2235
2236     ret = lookup_unit_table(_UNIT_TABLE, s)
2237     if ret is not None:
2238         return ret
2239
2240     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2241     if mobj:
2242         return str_to_int(mobj.group(1))
2243
2244
2245 def parse_resolution(s, *, lenient=False):
2246     if s is None:
2247         return {}
2248
2249     if lenient:
2250         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2251     else:
2252         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2253     if mobj:
2254         return {
2255             'width': int(mobj.group('w')),
2256             'height': int(mobj.group('h')),
2257         }
2258
2259     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2260     if mobj:
2261         return {'height': int(mobj.group(1))}
2262
2263     mobj = re.search(r'\b([48])[kK]\b', s)
2264     if mobj:
2265         return {'height': int(mobj.group(1)) * 540}
2266
2267     return {}
2268
2269
2270 def parse_bitrate(s):
2271     if not isinstance(s, compat_str):
2272         return
2273     mobj = re.search(r'\b(\d+)\s*kbps', s)
2274     if mobj:
2275         return int(mobj.group(1))
2276
2277
2278 def month_by_name(name, lang='en'):
2279     """ Return the number of a month by (locale-independently) English name """
2280
2281     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2282
2283     try:
2284         return month_names.index(name) + 1
2285     except ValueError:
2286         return None
2287
2288
2289 def month_by_abbreviation(abbrev):
2290     """ Return the number of a month by (locale-independently) English
2291         abbreviations """
2292
2293     try:
2294         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2295     except ValueError:
2296         return None
2297
2298
2299 def fix_xml_ampersands(xml_str):
2300     """Replace all the '&' by '&amp;' in XML"""
2301     return re.sub(
2302         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2303         '&amp;',
2304         xml_str)
2305
2306
2307 def setproctitle(title):
2308     assert isinstance(title, compat_str)
2309
2310     # ctypes in Jython is not complete
2311     # http://bugs.jython.org/issue2148
2312     if sys.platform.startswith('java'):
2313         return
2314
2315     try:
2316         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2317     except OSError:
2318         return
2319     except TypeError:
2320         # LoadLibrary in Windows Python 2.7.13 only expects
2321         # a bytestring, but since unicode_literals turns
2322         # every string into a unicode string, it fails.
2323         return
2324     title_bytes = title.encode('utf-8')
2325     buf = ctypes.create_string_buffer(len(title_bytes))
2326     buf.value = title_bytes
2327     try:
2328         libc.prctl(15, buf, 0, 0, 0)
2329     except AttributeError:
2330         return  # Strange libc, just skip this
2331
2332
2333 def remove_start(s, start):
2334     return s[len(start):] if s is not None and s.startswith(start) else s
2335
2336
2337 def remove_end(s, end):
2338     return s[:-len(end)] if s is not None and s.endswith(end) else s
2339
2340
2341 def remove_quotes(s):
2342     if s is None or len(s) < 2:
2343         return s
2344     for quote in ('"', "'", ):
2345         if s[0] == quote and s[-1] == quote:
2346             return s[1:-1]
2347     return s
2348
2349
2350 def get_domain(url):
2351     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
2352     return domain.group('domain') if domain else None
2353
2354
2355 def url_basename(url):
2356     path = compat_urlparse.urlparse(url).path
2357     return path.strip('/').split('/')[-1]
2358
2359
2360 def base_url(url):
2361     return re.match(r'https?://[^?#&]+/', url).group()
2362
2363
2364 def urljoin(base, path):
2365     if isinstance(path, bytes):
2366         path = path.decode('utf-8')
2367     if not isinstance(path, compat_str) or not path:
2368         return None
2369     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2370         return path
2371     if isinstance(base, bytes):
2372         base = base.decode('utf-8')
2373     if not isinstance(base, compat_str) or not re.match(
2374             r'^(?:https?:)?//', base):
2375         return None
2376     return compat_urlparse.urljoin(base, path)
2377
2378
2379 class HEADRequest(compat_urllib_request.Request):
2380     def get_method(self):
2381         return 'HEAD'
2382
2383
2384 class PUTRequest(compat_urllib_request.Request):
2385     def get_method(self):
2386         return 'PUT'
2387
2388
2389 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2390     if get_attr and v is not None:
2391         v = getattr(v, get_attr, None)
2392     try:
2393         return int(v) * invscale // scale
2394     except (ValueError, TypeError, OverflowError):
2395         return default
2396
2397
2398 def str_or_none(v, default=None):
2399     return default if v is None else compat_str(v)
2400
2401
2402 def str_to_int(int_str):
2403     """ A more relaxed version of int_or_none """
2404     if isinstance(int_str, compat_integer_types):
2405         return int_str
2406     elif isinstance(int_str, compat_str):
2407         int_str = re.sub(r'[,\.\+]', '', int_str)
2408         return int_or_none(int_str)
2409
2410
2411 def float_or_none(v, scale=1, invscale=1, default=None):
2412     if v is None:
2413         return default
2414     try:
2415         return float(v) * invscale / scale
2416     except (ValueError, TypeError):
2417         return default
2418
2419
2420 def bool_or_none(v, default=None):
2421     return v if isinstance(v, bool) else default
2422
2423
2424 def strip_or_none(v, default=None):
2425     return v.strip() if isinstance(v, compat_str) else default
2426
2427
2428 def url_or_none(url):
2429     if not url or not isinstance(url, compat_str):
2430         return None
2431     url = url.strip()
2432     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2433
2434
2435 def request_to_url(req):
2436     if isinstance(req, compat_urllib_request.Request):
2437         return req.get_full_url()
2438     else:
2439         return req
2440
2441
2442 def strftime_or_none(timestamp, date_format, default=None):
2443     datetime_object = None
2444     try:
2445         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
2446             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2447         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
2448             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2449         return datetime_object.strftime(date_format)
2450     except (ValueError, TypeError, AttributeError):
2451         return default
2452
2453
2454 def parse_duration(s):
2455     if not isinstance(s, compat_basestring):
2456         return None
2457     s = s.strip()
2458     if not s:
2459         return None
2460
2461     days, hours, mins, secs, ms = [None] * 5
2462     m = re.match(r'''(?x)
2463             (?P<before_secs>
2464                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2465             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2466             (?P<ms>[.:][0-9]+)?Z?$
2467         ''', s)
2468     if m:
2469         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2470     else:
2471         m = re.match(
2472             r'''(?ix)(?:P?
2473                 (?:
2474                     [0-9]+\s*y(?:ears?)?,?\s*
2475                 )?
2476                 (?:
2477                     [0-9]+\s*m(?:onths?)?,?\s*
2478                 )?
2479                 (?:
2480                     [0-9]+\s*w(?:eeks?)?,?\s*
2481                 )?
2482                 (?:
2483                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2484                 )?
2485                 T)?
2486                 (?:
2487                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2488                 )?
2489                 (?:
2490                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2491                 )?
2492                 (?:
2493                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2494                 )?Z?$''', s)
2495         if m:
2496             days, hours, mins, secs, ms = m.groups()
2497         else:
2498             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2499             if m:
2500                 hours, mins = m.groups()
2501             else:
2502                 return None
2503
2504     duration = 0
2505     if secs:
2506         duration += float(secs)
2507     if mins:
2508         duration += float(mins) * 60
2509     if hours:
2510         duration += float(hours) * 60 * 60
2511     if days:
2512         duration += float(days) * 24 * 60 * 60
2513     if ms:
2514         duration += float(ms.replace(':', '.'))
2515     return duration
2516
2517
2518 def prepend_extension(filename, ext, expected_real_ext=None):
2519     name, real_ext = os.path.splitext(filename)
2520     return (
2521         '{0}.{1}{2}'.format(name, ext, real_ext)
2522         if not expected_real_ext or real_ext[1:] == expected_real_ext
2523         else '{0}.{1}'.format(filename, ext))
2524
2525
2526 def replace_extension(filename, ext, expected_real_ext=None):
2527     name, real_ext = os.path.splitext(filename)
2528     return '{0}.{1}'.format(
2529         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2530         ext)
2531
2532
2533 def check_executable(exe, args=[]):
2534     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2535     args can be a list of arguments for a short output (like -version) """
2536     try:
2537         Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
2538     except OSError:
2539         return False
2540     return exe
2541
2542
2543 def _get_exe_version_output(exe, args, *, to_screen=None):
2544     if to_screen:
2545         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2546     try:
2547         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2548         # SIGTTOU if yt-dlp is run in the background.
2549         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2550         out, _ = Popen(
2551             [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
2552             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
2553     except OSError:
2554         return False
2555     if isinstance(out, bytes):  # Python 2.x
2556         out = out.decode('ascii', 'ignore')
2557     return out
2558
2559
2560 def detect_exe_version(output, version_re=None, unrecognized='present'):
2561     assert isinstance(output, compat_str)
2562     if version_re is None:
2563         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2564     m = re.search(version_re, output)
2565     if m:
2566         return m.group(1)
2567     else:
2568         return unrecognized
2569
2570
2571 def get_exe_version(exe, args=['--version'],
2572                     version_re=None, unrecognized='present'):
2573     """ Returns the version of the specified executable,
2574     or False if the executable is not present """
2575     out = _get_exe_version_output(exe, args)
2576     return detect_exe_version(out, version_re, unrecognized) if out else False
2577
2578
2579 class LazyList(collections.abc.Sequence):
2580     ''' Lazy immutable list from an iterable
2581     Note that slices of a LazyList are lists and not LazyList'''
2582
2583     class IndexError(IndexError):
2584         pass
2585
2586     def __init__(self, iterable, *, reverse=False, _cache=None):
2587         self.__iterable = iter(iterable)
2588         self.__cache = [] if _cache is None else _cache
2589         self.__reversed = reverse
2590
2591     def __iter__(self):
2592         if self.__reversed:
2593             # We need to consume the entire iterable to iterate in reverse
2594             yield from self.exhaust()
2595             return
2596         yield from self.__cache
2597         for item in self.__iterable:
2598             self.__cache.append(item)
2599             yield item
2600
2601     def __exhaust(self):
2602         self.__cache.extend(self.__iterable)
2603         # Discard the emptied iterable to make it pickle-able
2604         self.__iterable = []
2605         return self.__cache
2606
2607     def exhaust(self):
2608         ''' Evaluate the entire iterable '''
2609         return self.__exhaust()[::-1 if self.__reversed else 1]
2610
2611     @staticmethod
2612     def __reverse_index(x):
2613         return None if x is None else -(x + 1)
2614
2615     def __getitem__(self, idx):
2616         if isinstance(idx, slice):
2617             if self.__reversed:
2618                 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
2619             start, stop, step = idx.start, idx.stop, idx.step or 1
2620         elif isinstance(idx, int):
2621             if self.__reversed:
2622                 idx = self.__reverse_index(idx)
2623             start, stop, step = idx, idx, 0
2624         else:
2625             raise TypeError('indices must be integers or slices')
2626         if ((start or 0) < 0 or (stop or 0) < 0
2627                 or (start is None and step < 0)
2628                 or (stop is None and step > 0)):
2629             # We need to consume the entire iterable to be able to slice from the end
2630             # Obviously, never use this with infinite iterables
2631             self.__exhaust()
2632             try:
2633                 return self.__cache[idx]
2634             except IndexError as e:
2635                 raise self.IndexError(e) from e
2636         n = max(start or 0, stop or 0) - len(self.__cache) + 1
2637         if n > 0:
2638             self.__cache.extend(itertools.islice(self.__iterable, n))
2639         try:
2640             return self.__cache[idx]
2641         except IndexError as e:
2642             raise self.IndexError(e) from e
2643
2644     def __bool__(self):
2645         try:
2646             self[-1] if self.__reversed else self[0]
2647         except self.IndexError:
2648             return False
2649         return True
2650
2651     def __len__(self):
2652         self.__exhaust()
2653         return len(self.__cache)
2654
2655     def __reversed__(self):
2656         return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
2657
2658     def __copy__(self):
2659         return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
2660
2661     def __repr__(self):
2662         # repr and str should mimic a list. So we exhaust the iterable
2663         return repr(self.exhaust())
2664
2665     def __str__(self):
2666         return repr(self.exhaust())
2667
2668
2669 class PagedList:
2670
2671     class IndexError(IndexError):
2672         pass
2673
2674     def __len__(self):
2675         # This is only useful for tests
2676         return len(self.getslice())
2677
2678     def __init__(self, pagefunc, pagesize, use_cache=True):
2679         self._pagefunc = pagefunc
2680         self._pagesize = pagesize
2681         self._pagecount = float('inf')
2682         self._use_cache = use_cache
2683         self._cache = {}
2684
2685     def getpage(self, pagenum):
2686         page_results = self._cache.get(pagenum)
2687         if page_results is None:
2688             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2689         if self._use_cache:
2690             self._cache[pagenum] = page_results
2691         return page_results
2692
2693     def getslice(self, start=0, end=None):
2694         return list(self._getslice(start, end))
2695
2696     def _getslice(self, start, end):
2697         raise NotImplementedError('This method must be implemented by subclasses')
2698
2699     def __getitem__(self, idx):
2700         assert self._use_cache, 'Indexing PagedList requires cache'
2701         if not isinstance(idx, int) or idx < 0:
2702             raise TypeError('indices must be non-negative integers')
2703         entries = self.getslice(idx, idx + 1)
2704         if not entries:
2705             raise self.IndexError()
2706         return entries[0]
2707
2708
2709 class OnDemandPagedList(PagedList):
2710     """Download pages until a page with less than maximum results"""
2711     def _getslice(self, start, end):
2712         for pagenum in itertools.count(start // self._pagesize):
2713             firstid = pagenum * self._pagesize
2714             nextfirstid = pagenum * self._pagesize + self._pagesize
2715             if start >= nextfirstid:
2716                 continue
2717
2718             startv = (
2719                 start % self._pagesize
2720                 if firstid <= start < nextfirstid
2721                 else 0)
2722             endv = (
2723                 ((end - 1) % self._pagesize) + 1
2724                 if (end is not None and firstid <= end <= nextfirstid)
2725                 else None)
2726
2727             try:
2728                 page_results = self.getpage(pagenum)
2729             except Exception:
2730                 self._pagecount = pagenum - 1
2731                 raise
2732             if startv != 0 or endv is not None:
2733                 page_results = page_results[startv:endv]
2734             yield from page_results
2735
2736             # A little optimization - if current page is not "full", ie. does
2737             # not contain page_size videos then we can assume that this page
2738             # is the last one - there are no more ids on further pages -
2739             # i.e. no need to query again.
2740             if len(page_results) + startv < self._pagesize:
2741                 break
2742
2743             # If we got the whole page, but the next page is not interesting,
2744             # break out early as well
2745             if end == nextfirstid:
2746                 break
2747
2748
2749 class InAdvancePagedList(PagedList):
2750     """PagedList with total number of pages known in advance"""
2751     def __init__(self, pagefunc, pagecount, pagesize):
2752         PagedList.__init__(self, pagefunc, pagesize, True)
2753         self._pagecount = pagecount
2754
2755     def _getslice(self, start, end):
2756         start_page = start // self._pagesize
2757         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2758         skip_elems = start - start_page * self._pagesize
2759         only_more = None if end is None else end - start
2760         for pagenum in range(start_page, end_page):
2761             page_results = self.getpage(pagenum)
2762             if skip_elems:
2763                 page_results = page_results[skip_elems:]
2764                 skip_elems = None
2765             if only_more is not None:
2766                 if len(page_results) < only_more:
2767                     only_more -= len(page_results)
2768                 else:
2769                     yield from page_results[:only_more]
2770                     break
2771             yield from page_results
2772
2773
2774 def uppercase_escape(s):
2775     unicode_escape = codecs.getdecoder('unicode_escape')
2776     return re.sub(
2777         r'\\U[0-9a-fA-F]{8}',
2778         lambda m: unicode_escape(m.group(0))[0],
2779         s)
2780
2781
2782 def lowercase_escape(s):
2783     unicode_escape = codecs.getdecoder('unicode_escape')
2784     return re.sub(
2785         r'\\u[0-9a-fA-F]{4}',
2786         lambda m: unicode_escape(m.group(0))[0],
2787         s)
2788
2789
2790 def escape_rfc3986(s):
2791     """Escape non-ASCII characters as suggested by RFC 3986"""
2792     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2793
2794
2795 def escape_url(url):
2796     """Escape URL as suggested by RFC 3986"""
2797     url_parsed = compat_urllib_parse_urlparse(url)
2798     return url_parsed._replace(
2799         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2800         path=escape_rfc3986(url_parsed.path),
2801         params=escape_rfc3986(url_parsed.params),
2802         query=escape_rfc3986(url_parsed.query),
2803         fragment=escape_rfc3986(url_parsed.fragment)
2804     ).geturl()
2805
2806
2807 def parse_qs(url):
2808     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2809
2810
2811 def read_batch_urls(batch_fd):
2812     def fixup(url):
2813         if not isinstance(url, compat_str):
2814             url = url.decode('utf-8', 'replace')
2815         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
2816         for bom in BOM_UTF8:
2817             if url.startswith(bom):
2818                 url = url[len(bom):]
2819         url = url.lstrip()
2820         if not url or url.startswith(('#', ';', ']')):
2821             return False
2822         # "#" cannot be stripped out since it is part of the URI
2823         # However, it can be safely stipped out if follwing a whitespace
2824         return re.split(r'\s#', url, 1)[0].rstrip()
2825
2826     with contextlib.closing(batch_fd) as fd:
2827         return [url for url in map(fixup, fd) if url]
2828
2829
2830 def urlencode_postdata(*args, **kargs):
2831     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
2832
2833
2834 def update_url_query(url, query):
2835     if not query:
2836         return url
2837     parsed_url = compat_urlparse.urlparse(url)
2838     qs = compat_parse_qs(parsed_url.query)
2839     qs.update(query)
2840     return compat_urlparse.urlunparse(parsed_url._replace(
2841         query=compat_urllib_parse_urlencode(qs, True)))
2842
2843
2844 def update_Request(req, url=None, data=None, headers={}, query={}):
2845     req_headers = req.headers.copy()
2846     req_headers.update(headers)
2847     req_data = data or req.data
2848     req_url = update_url_query(url or req.get_full_url(), query)
2849     req_get_method = req.get_method()
2850     if req_get_method == 'HEAD':
2851         req_type = HEADRequest
2852     elif req_get_method == 'PUT':
2853         req_type = PUTRequest
2854     else:
2855         req_type = compat_urllib_request.Request
2856     new_req = req_type(
2857         req_url, data=req_data, headers=req_headers,
2858         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
2859     if hasattr(req, 'timeout'):
2860         new_req.timeout = req.timeout
2861     return new_req
2862
2863
2864 def _multipart_encode_impl(data, boundary):
2865     content_type = 'multipart/form-data; boundary=%s' % boundary
2866
2867     out = b''
2868     for k, v in data.items():
2869         out += b'--' + boundary.encode('ascii') + b'\r\n'
2870         if isinstance(k, compat_str):
2871             k = k.encode('utf-8')
2872         if isinstance(v, compat_str):
2873             v = v.encode('utf-8')
2874         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
2875         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
2876         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
2877         if boundary.encode('ascii') in content:
2878             raise ValueError('Boundary overlaps with data')
2879         out += content
2880
2881     out += b'--' + boundary.encode('ascii') + b'--\r\n'
2882
2883     return out, content_type
2884
2885
2886 def multipart_encode(data, boundary=None):
2887     '''
2888     Encode a dict to RFC 7578-compliant form-data
2889
2890     data:
2891         A dict where keys and values can be either Unicode or bytes-like
2892         objects.
2893     boundary:
2894         If specified a Unicode object, it's used as the boundary. Otherwise
2895         a random boundary is generated.
2896
2897     Reference: https://tools.ietf.org/html/rfc7578
2898     '''
2899     has_specified_boundary = boundary is not None
2900
2901     while True:
2902         if boundary is None:
2903             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
2904
2905         try:
2906             out, content_type = _multipart_encode_impl(data, boundary)
2907             break
2908         except ValueError:
2909             if has_specified_boundary:
2910                 raise
2911             boundary = None
2912
2913     return out, content_type
2914
2915
2916 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
2917     for val in map(d.get, variadic(key_or_keys)):
2918         if val is not None and (val or not skip_false_values):
2919             return val
2920     return default
2921
2922
2923 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
2924     for f in funcs:
2925         try:
2926             val = f(*args, **kwargs)
2927         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
2928             pass
2929         else:
2930             if expected_type is None or isinstance(val, expected_type):
2931                 return val
2932
2933
2934 def try_get(src, getter, expected_type=None):
2935     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
2936
2937
2938 def filter_dict(dct, cndn=lambda _, v: v is not None):
2939     return {k: v for k, v in dct.items() if cndn(k, v)}
2940
2941
2942 def merge_dicts(*dicts):
2943     merged = {}
2944     for a_dict in dicts:
2945         for k, v in a_dict.items():
2946             if (v is not None and k not in merged
2947                     or isinstance(v, str) and merged[k] == ''):
2948                 merged[k] = v
2949     return merged
2950
2951
2952 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
2953     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
2954
2955
2956 US_RATINGS = {
2957     'G': 0,
2958     'PG': 10,
2959     'PG-13': 13,
2960     'R': 16,
2961     'NC': 18,
2962 }
2963
2964
2965 TV_PARENTAL_GUIDELINES = {
2966     'TV-Y': 0,
2967     'TV-Y7': 7,
2968     'TV-G': 0,
2969     'TV-PG': 0,
2970     'TV-14': 14,
2971     'TV-MA': 17,
2972 }
2973
2974
2975 def parse_age_limit(s):
2976     if type(s) == int:
2977         return s if 0 <= s <= 21 else None
2978     if not isinstance(s, compat_basestring):
2979         return None
2980     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
2981     if m:
2982         return int(m.group('age'))
2983     s = s.upper()
2984     if s in US_RATINGS:
2985         return US_RATINGS[s]
2986     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
2987     if m:
2988         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
2989     return None
2990
2991
2992 def strip_jsonp(code):
2993     return re.sub(
2994         r'''(?sx)^
2995             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
2996             (?:\s*&&\s*(?P=func_name))?
2997             \s*\(\s*(?P<callback_data>.*)\);?
2998             \s*?(?://[^\n]*)*$''',
2999         r'\g<callback_data>', code)
3000
3001
3002 def js_to_json(code, vars={}):
3003     # vars is a dict of var, val pairs to substitute
3004     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3005     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
3006     INTEGER_TABLE = (
3007         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
3008         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
3009     )
3010
3011     def fix_kv(m):
3012         v = m.group(0)
3013         if v in ('true', 'false', 'null'):
3014             return v
3015         elif v in ('undefined', 'void 0'):
3016             return 'null'
3017         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3018             return ""
3019
3020         if v[0] in ("'", '"'):
3021             v = re.sub(r'(?s)\\.|"', lambda m: {
3022                 '"': '\\"',
3023                 "\\'": "'",
3024                 '\\\n': '',
3025                 '\\x': '\\u00',
3026             }.get(m.group(0), m.group(0)), v[1:-1])
3027         else:
3028             for regex, base in INTEGER_TABLE:
3029                 im = re.match(regex, v)
3030                 if im:
3031                     i = int(im.group(1), base)
3032                     return '"%d":' % i if v.endswith(':') else '%d' % i
3033
3034             if v in vars:
3035                 return vars[v]
3036
3037         return '"%s"' % v
3038
3039     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3040
3041     return re.sub(r'''(?sx)
3042         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
3043         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
3044         {comment}|,(?={skip}[\]}}])|
3045         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3046         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3047         [0-9]+(?={skip}:)|
3048         !+
3049         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3050
3051
3052 def qualities(quality_ids):
3053     """ Get a numeric quality value out of a list of possible values """
3054     def q(qid):
3055         try:
3056             return quality_ids.index(qid)
3057         except ValueError:
3058             return -1
3059     return q
3060
3061
3062 POSTPROCESS_WHEN = {'pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'}
3063
3064
3065 DEFAULT_OUTTMPL = {
3066     'default': '%(title)s [%(id)s].%(ext)s',
3067     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3068 }
3069 OUTTMPL_TYPES = {
3070     'chapter': None,
3071     'subtitle': None,
3072     'thumbnail': None,
3073     'description': 'description',
3074     'annotation': 'annotations.xml',
3075     'infojson': 'info.json',
3076     'link': None,
3077     'pl_video': None,
3078     'pl_thumbnail': None,
3079     'pl_description': 'description',
3080     'pl_infojson': 'info.json',
3081 }
3082
3083 # As of [1] format syntax is:
3084 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3085 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3086 STR_FORMAT_RE_TMPL = r'''(?x)
3087     (?<!%)(?P<prefix>(?:%%)*)
3088     %
3089     (?P<has_key>\((?P<key>{0})\))?
3090     (?P<format>
3091         (?P<conversion>[#0\-+ ]+)?
3092         (?P<min_width>\d+)?
3093         (?P<precision>\.\d+)?
3094         (?P<len_mod>[hlL])?  # unused in python
3095         {1}  # conversion type
3096     )
3097 '''
3098
3099
3100 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3101
3102
3103 def limit_length(s, length):
3104     """ Add ellipses to overly long strings """
3105     if s is None:
3106         return None
3107     ELLIPSES = '...'
3108     if len(s) > length:
3109         return s[:length - len(ELLIPSES)] + ELLIPSES
3110     return s
3111
3112
3113 def version_tuple(v):
3114     return tuple(int(e) for e in re.split(r'[-.]', v))
3115
3116
3117 def is_outdated_version(version, limit, assume_new=True):
3118     if not version:
3119         return not assume_new
3120     try:
3121         return version_tuple(version) < version_tuple(limit)
3122     except ValueError:
3123         return not assume_new
3124
3125
3126 def ytdl_is_updateable():
3127     """ Returns if yt-dlp can be updated with -U """
3128
3129     from .update import is_non_updateable
3130
3131     return not is_non_updateable()
3132
3133
3134 def args_to_str(args):
3135     # Get a short string representation for a subprocess command
3136     return ' '.join(compat_shlex_quote(a) for a in args)
3137
3138
3139 def error_to_compat_str(err):
3140     return str(err)
3141
3142
3143 def error_to_str(err):
3144     return f'{type(err).__name__}: {err}'
3145
3146
3147 def mimetype2ext(mt):
3148     if mt is None:
3149         return None
3150
3151     mt, _, params = mt.partition(';')
3152     mt = mt.strip()
3153
3154     FULL_MAP = {
3155         'audio/mp4': 'm4a',
3156         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3157         # it's the most popular one
3158         'audio/mpeg': 'mp3',
3159         'audio/x-wav': 'wav',
3160         'audio/wav': 'wav',
3161         'audio/wave': 'wav',
3162     }
3163
3164     ext = FULL_MAP.get(mt)
3165     if ext is not None:
3166         return ext
3167
3168     SUBTYPE_MAP = {
3169         '3gpp': '3gp',
3170         'smptett+xml': 'tt',
3171         'ttaf+xml': 'dfxp',
3172         'ttml+xml': 'ttml',
3173         'x-flv': 'flv',
3174         'x-mp4-fragmented': 'mp4',
3175         'x-ms-sami': 'sami',
3176         'x-ms-wmv': 'wmv',
3177         'mpegurl': 'm3u8',
3178         'x-mpegurl': 'm3u8',
3179         'vnd.apple.mpegurl': 'm3u8',
3180         'dash+xml': 'mpd',
3181         'f4m+xml': 'f4m',
3182         'hds+xml': 'f4m',
3183         'vnd.ms-sstr+xml': 'ism',
3184         'quicktime': 'mov',
3185         'mp2t': 'ts',
3186         'x-wav': 'wav',
3187         'filmstrip+json': 'fs',
3188         'svg+xml': 'svg',
3189     }
3190
3191     _, _, subtype = mt.rpartition('/')
3192     ext = SUBTYPE_MAP.get(subtype.lower())
3193     if ext is not None:
3194         return ext
3195
3196     SUFFIX_MAP = {
3197         'json': 'json',
3198         'xml': 'xml',
3199         'zip': 'zip',
3200         'gzip': 'gz',
3201     }
3202
3203     _, _, suffix = subtype.partition('+')
3204     ext = SUFFIX_MAP.get(suffix)
3205     if ext is not None:
3206         return ext
3207
3208     return subtype.replace('+', '.')
3209
3210
3211 def ext2mimetype(ext_or_url):
3212     if not ext_or_url:
3213         return None
3214     if '.' not in ext_or_url:
3215         ext_or_url = f'file.{ext_or_url}'
3216     return mimetypes.guess_type(ext_or_url)[0]
3217
3218
3219 def parse_codecs(codecs_str):
3220     # http://tools.ietf.org/html/rfc6381
3221     if not codecs_str:
3222         return {}
3223     split_codecs = list(filter(None, map(
3224         str.strip, codecs_str.strip().strip(',').split(','))))
3225     vcodec, acodec, tcodec, hdr = None, None, None, None
3226     for full_codec in split_codecs:
3227         parts = full_codec.split('.')
3228         codec = parts[0].replace('0', '')
3229         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3230                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3231             if not vcodec:
3232                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
3233                 if codec in ('dvh1', 'dvhe'):
3234                     hdr = 'DV'
3235                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
3236                     hdr = 'HDR10'
3237                 elif full_codec.replace('0', '').startswith('vp9.2'):
3238                     hdr = 'HDR10'
3239         elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3240             if not acodec:
3241                 acodec = full_codec
3242         elif codec in ('stpp', 'wvtt',):
3243             if not tcodec:
3244                 tcodec = full_codec
3245         else:
3246             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
3247     if vcodec or acodec or tcodec:
3248         return {
3249             'vcodec': vcodec or 'none',
3250             'acodec': acodec or 'none',
3251             'dynamic_range': hdr,
3252             **({'tcodec': tcodec} if tcodec is not None else {}),
3253         }
3254     elif len(split_codecs) == 2:
3255         return {
3256             'vcodec': split_codecs[0],
3257             'acodec': split_codecs[1],
3258         }
3259     return {}
3260
3261
3262 def urlhandle_detect_ext(url_handle):
3263     getheader = url_handle.headers.get
3264
3265     cd = getheader('Content-Disposition')
3266     if cd:
3267         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3268         if m:
3269             e = determine_ext(m.group('filename'), default_ext=None)
3270             if e:
3271                 return e
3272
3273     return mimetype2ext(getheader('Content-Type'))
3274
3275
3276 def encode_data_uri(data, mime_type):
3277     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3278
3279
3280 def age_restricted(content_limit, age_limit):
3281     """ Returns True iff the content should be blocked """
3282
3283     if age_limit is None:  # No limit set
3284         return False
3285     if content_limit is None:
3286         return False  # Content available for everyone
3287     return age_limit < content_limit
3288
3289
3290 def is_html(first_bytes):
3291     """ Detect whether a file contains HTML by examining its first bytes. """
3292
3293     BOMS = [
3294         (b'\xef\xbb\xbf', 'utf-8'),
3295         (b'\x00\x00\xfe\xff', 'utf-32-be'),
3296         (b'\xff\xfe\x00\x00', 'utf-32-le'),
3297         (b'\xff\xfe', 'utf-16-le'),
3298         (b'\xfe\xff', 'utf-16-be'),
3299     ]
3300     for bom, enc in BOMS:
3301         if first_bytes.startswith(bom):
3302             s = first_bytes[len(bom):].decode(enc, 'replace')
3303             break
3304     else:
3305         s = first_bytes.decode('utf-8', 'replace')
3306
3307     return re.match(r'^\s*<', s)
3308
3309
3310 def determine_protocol(info_dict):
3311     protocol = info_dict.get('protocol')
3312     if protocol is not None:
3313         return protocol
3314
3315     url = sanitize_url(info_dict['url'])
3316     if url.startswith('rtmp'):
3317         return 'rtmp'
3318     elif url.startswith('mms'):
3319         return 'mms'
3320     elif url.startswith('rtsp'):
3321         return 'rtsp'
3322
3323     ext = determine_ext(url)
3324     if ext == 'm3u8':
3325         return 'm3u8'
3326     elif ext == 'f4m':
3327         return 'f4m'
3328
3329     return compat_urllib_parse_urlparse(url).scheme
3330
3331
3332 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3333     """ Render a list of rows, each as a list of values.
3334     Text after a \t will be right aligned """
3335     def width(string):
3336         return len(remove_terminal_sequences(string).replace('\t', ''))
3337
3338     def get_max_lens(table):
3339         return [max(width(str(v)) for v in col) for col in zip(*table)]
3340
3341     def filter_using_list(row, filterArray):
3342         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3343
3344     max_lens = get_max_lens(data) if hide_empty else []
3345     header_row = filter_using_list(header_row, max_lens)
3346     data = [filter_using_list(row, max_lens) for row in data]
3347
3348     table = [header_row] + data
3349     max_lens = get_max_lens(table)
3350     extra_gap += 1
3351     if delim:
3352         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3353         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3354     for row in table:
3355         for pos, text in enumerate(map(str, row)):
3356             if '\t' in text:
3357                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3358             else:
3359                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3360     ret = '\n'.join(''.join(row).rstrip() for row in table)
3361     return ret
3362
3363
3364 def _match_one(filter_part, dct, incomplete):
3365     # TODO: Generalize code with YoutubeDL._build_format_filter
3366     STRING_OPERATORS = {
3367         '*=': operator.contains,
3368         '^=': lambda attr, value: attr.startswith(value),
3369         '$=': lambda attr, value: attr.endswith(value),
3370         '~=': lambda attr, value: re.search(value, attr),
3371     }
3372     COMPARISON_OPERATORS = {
3373         **STRING_OPERATORS,
3374         '<=': operator.le,  # "<=" must be defined above "<"
3375         '<': operator.lt,
3376         '>=': operator.ge,
3377         '>': operator.gt,
3378         '=': operator.eq,
3379     }
3380
3381     if isinstance(incomplete, bool):
3382         is_incomplete = lambda _: incomplete
3383     else:
3384         is_incomplete = lambda k: k in incomplete
3385
3386     operator_rex = re.compile(r'''(?x)\s*
3387         (?P<key>[a-z_]+)
3388         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3389         (?:
3390             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3391             (?P<strval>.+?)
3392         )
3393         \s*$
3394         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3395     m = operator_rex.search(filter_part)
3396     if m:
3397         m = m.groupdict()
3398         unnegated_op = COMPARISON_OPERATORS[m['op']]
3399         if m['negation']:
3400             op = lambda attr, value: not unnegated_op(attr, value)
3401         else:
3402             op = unnegated_op
3403         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3404         if m['quote']:
3405             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3406         actual_value = dct.get(m['key'])
3407         numeric_comparison = None
3408         if isinstance(actual_value, compat_numeric_types):
3409             # If the original field is a string and matching comparisonvalue is
3410             # a number we should respect the origin of the original field
3411             # and process comparison value as a string (see
3412             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3413             try:
3414                 numeric_comparison = int(comparison_value)
3415             except ValueError:
3416                 numeric_comparison = parse_filesize(comparison_value)
3417                 if numeric_comparison is None:
3418                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3419                 if numeric_comparison is None:
3420                     numeric_comparison = parse_duration(comparison_value)
3421         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3422             raise ValueError('Operator %s only supports string values!' % m['op'])
3423         if actual_value is None:
3424             return is_incomplete(m['key']) or m['none_inclusive']
3425         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3426
3427     UNARY_OPERATORS = {
3428         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3429         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3430     }
3431     operator_rex = re.compile(r'''(?x)\s*
3432         (?P<op>%s)\s*(?P<key>[a-z_]+)
3433         \s*$
3434         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3435     m = operator_rex.search(filter_part)
3436     if m:
3437         op = UNARY_OPERATORS[m.group('op')]
3438         actual_value = dct.get(m.group('key'))
3439         if is_incomplete(m.group('key')) and actual_value is None:
3440             return True
3441         return op(actual_value)
3442
3443     raise ValueError('Invalid filter part %r' % filter_part)
3444
3445
3446 def match_str(filter_str, dct, incomplete=False):
3447     """ Filter a dictionary with a simple string syntax.
3448     @returns           Whether the filter passes
3449     @param incomplete  Set of keys that is expected to be missing from dct.
3450                        Can be True/False to indicate all/none of the keys may be missing.
3451                        All conditions on incomplete keys pass if the key is missing
3452     """
3453     return all(
3454         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3455         for filter_part in re.split(r'(?<!\\)&', filter_str))
3456
3457
3458 def match_filter_func(filters):
3459     if not filters:
3460         return None
3461     filters = variadic(filters)
3462
3463     def _match_func(info_dict, *args, **kwargs):
3464         if any(match_str(f, info_dict, *args, **kwargs) for f in filters):
3465             return None
3466         else:
3467             video_title = info_dict.get('title') or info_dict.get('id') or 'video'
3468             filter_str = ') | ('.join(map(str.strip, filters))
3469             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3470     return _match_func
3471
3472
3473 def parse_dfxp_time_expr(time_expr):
3474     if not time_expr:
3475         return
3476
3477     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
3478     if mobj:
3479         return float(mobj.group('time_offset'))
3480
3481     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3482     if mobj:
3483         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3484
3485
3486 def srt_subtitles_timecode(seconds):
3487     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3488
3489
3490 def ass_subtitles_timecode(seconds):
3491     time = timetuple_from_msec(seconds * 1000)
3492     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3493
3494
3495 def dfxp2srt(dfxp_data):
3496     '''
3497     @param dfxp_data A bytes-like object containing DFXP data
3498     @returns A unicode object containing converted SRT data
3499     '''
3500     LEGACY_NAMESPACES = (
3501         (b'http://www.w3.org/ns/ttml', [
3502             b'http://www.w3.org/2004/11/ttaf1',
3503             b'http://www.w3.org/2006/04/ttaf1',
3504             b'http://www.w3.org/2006/10/ttaf1',
3505         ]),
3506         (b'http://www.w3.org/ns/ttml#styling', [
3507             b'http://www.w3.org/ns/ttml#style',
3508         ]),
3509     )
3510
3511     SUPPORTED_STYLING = [
3512         'color',
3513         'fontFamily',
3514         'fontSize',
3515         'fontStyle',
3516         'fontWeight',
3517         'textDecoration'
3518     ]
3519
3520     _x = functools.partial(xpath_with_ns, ns_map={
3521         'xml': 'http://www.w3.org/XML/1998/namespace',
3522         'ttml': 'http://www.w3.org/ns/ttml',
3523         'tts': 'http://www.w3.org/ns/ttml#styling',
3524     })
3525
3526     styles = {}
3527     default_style = {}
3528
3529     class TTMLPElementParser(object):
3530         _out = ''
3531         _unclosed_elements = []
3532         _applied_styles = []
3533
3534         def start(self, tag, attrib):
3535             if tag in (_x('ttml:br'), 'br'):
3536                 self._out += '\n'
3537             else:
3538                 unclosed_elements = []
3539                 style = {}
3540                 element_style_id = attrib.get('style')
3541                 if default_style:
3542                     style.update(default_style)
3543                 if element_style_id:
3544                     style.update(styles.get(element_style_id, {}))
3545                 for prop in SUPPORTED_STYLING:
3546                     prop_val = attrib.get(_x('tts:' + prop))
3547                     if prop_val:
3548                         style[prop] = prop_val
3549                 if style:
3550                     font = ''
3551                     for k, v in sorted(style.items()):
3552                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3553                             continue
3554                         if k == 'color':
3555                             font += ' color="%s"' % v
3556                         elif k == 'fontSize':
3557                             font += ' size="%s"' % v
3558                         elif k == 'fontFamily':
3559                             font += ' face="%s"' % v
3560                         elif k == 'fontWeight' and v == 'bold':
3561                             self._out += '<b>'
3562                             unclosed_elements.append('b')
3563                         elif k == 'fontStyle' and v == 'italic':
3564                             self._out += '<i>'
3565                             unclosed_elements.append('i')
3566                         elif k == 'textDecoration' and v == 'underline':
3567                             self._out += '<u>'
3568                             unclosed_elements.append('u')
3569                     if font:
3570                         self._out += '<font' + font + '>'
3571                         unclosed_elements.append('font')
3572                     applied_style = {}
3573                     if self._applied_styles:
3574                         applied_style.update(self._applied_styles[-1])
3575                     applied_style.update(style)
3576                     self._applied_styles.append(applied_style)
3577                 self._unclosed_elements.append(unclosed_elements)
3578
3579         def end(self, tag):
3580             if tag not in (_x('ttml:br'), 'br'):
3581                 unclosed_elements = self._unclosed_elements.pop()
3582                 for element in reversed(unclosed_elements):
3583                     self._out += '</%s>' % element
3584                 if unclosed_elements and self._applied_styles:
3585                     self._applied_styles.pop()
3586
3587         def data(self, data):
3588             self._out += data
3589
3590         def close(self):
3591             return self._out.strip()
3592
3593     def parse_node(node):
3594         target = TTMLPElementParser()
3595         parser = xml.etree.ElementTree.XMLParser(target=target)
3596         parser.feed(xml.etree.ElementTree.tostring(node))
3597         return parser.close()
3598
3599     for k, v in LEGACY_NAMESPACES:
3600         for ns in v:
3601             dfxp_data = dfxp_data.replace(ns, k)
3602
3603     dfxp = compat_etree_fromstring(dfxp_data)
3604     out = []
3605     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3606
3607     if not paras:
3608         raise ValueError('Invalid dfxp/TTML subtitle')
3609
3610     repeat = False
3611     while True:
3612         for style in dfxp.findall(_x('.//ttml:style')):
3613             style_id = style.get('id') or style.get(_x('xml:id'))
3614             if not style_id:
3615                 continue
3616             parent_style_id = style.get('style')
3617             if parent_style_id:
3618                 if parent_style_id not in styles:
3619                     repeat = True
3620                     continue
3621                 styles[style_id] = styles[parent_style_id].copy()
3622             for prop in SUPPORTED_STYLING:
3623                 prop_val = style.get(_x('tts:' + prop))
3624                 if prop_val:
3625                     styles.setdefault(style_id, {})[prop] = prop_val
3626         if repeat:
3627             repeat = False
3628         else:
3629             break
3630
3631     for p in ('body', 'div'):
3632         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3633         if ele is None:
3634             continue
3635         style = styles.get(ele.get('style'))
3636         if not style:
3637             continue
3638         default_style.update(style)
3639
3640     for para, index in zip(paras, itertools.count(1)):
3641         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3642         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3643         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3644         if begin_time is None:
3645             continue
3646         if not end_time:
3647             if not dur:
3648                 continue
3649             end_time = begin_time + dur
3650         out.append('%d\n%s --> %s\n%s\n\n' % (
3651             index,
3652             srt_subtitles_timecode(begin_time),
3653             srt_subtitles_timecode(end_time),
3654             parse_node(para)))
3655
3656     return ''.join(out)
3657
3658
3659 def cli_option(params, command_option, param):
3660     param = params.get(param)
3661     if param:
3662         param = compat_str(param)
3663     return [command_option, param] if param is not None else []
3664
3665
3666 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3667     param = params.get(param)
3668     if param is None:
3669         return []
3670     assert isinstance(param, bool)
3671     if separator:
3672         return [command_option + separator + (true_value if param else false_value)]
3673     return [command_option, true_value if param else false_value]
3674
3675
3676 def cli_valueless_option(params, command_option, param, expected_value=True):
3677     param = params.get(param)
3678     return [command_option] if param == expected_value else []
3679
3680
3681 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3682     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3683         if use_compat:
3684             return argdict
3685         else:
3686             argdict = None
3687     if argdict is None:
3688         return default
3689     assert isinstance(argdict, dict)
3690
3691     assert isinstance(keys, (list, tuple))
3692     for key_list in keys:
3693         arg_list = list(filter(
3694             lambda x: x is not None,
3695             [argdict.get(key.lower()) for key in variadic(key_list)]))
3696         if arg_list:
3697             return [arg for args in arg_list for arg in args]
3698     return default
3699
3700
3701 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3702     main_key, exe = main_key.lower(), exe.lower()
3703     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3704     keys = [f'{root_key}{k}' for k in (keys or [''])]
3705     if root_key in keys:
3706         if main_key != exe:
3707             keys.append((main_key, exe))
3708         keys.append('default')
3709     else:
3710         use_compat = False
3711     return cli_configuration_args(argdict, keys, default, use_compat)
3712
3713
3714 class ISO639Utils(object):
3715     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3716     _lang_map = {
3717         'aa': 'aar',
3718         'ab': 'abk',
3719         'ae': 'ave',
3720         'af': 'afr',
3721         'ak': 'aka',
3722         'am': 'amh',
3723         'an': 'arg',
3724         'ar': 'ara',
3725         'as': 'asm',
3726         'av': 'ava',
3727         'ay': 'aym',
3728         'az': 'aze',
3729         'ba': 'bak',
3730         'be': 'bel',
3731         'bg': 'bul',
3732         'bh': 'bih',
3733         'bi': 'bis',
3734         'bm': 'bam',
3735         'bn': 'ben',
3736         'bo': 'bod',
3737         'br': 'bre',
3738         'bs': 'bos',
3739         'ca': 'cat',
3740         'ce': 'che',
3741         'ch': 'cha',
3742         'co': 'cos',
3743         'cr': 'cre',
3744         'cs': 'ces',
3745         'cu': 'chu',
3746         'cv': 'chv',
3747         'cy': 'cym',
3748         'da': 'dan',
3749         'de': 'deu',
3750         'dv': 'div',
3751         'dz': 'dzo',
3752         'ee': 'ewe',
3753         'el': 'ell',
3754         'en': 'eng',
3755         'eo': 'epo',
3756         'es': 'spa',
3757         'et': 'est',
3758         'eu': 'eus',
3759         'fa': 'fas',
3760         'ff': 'ful',
3761         'fi': 'fin',
3762         'fj': 'fij',
3763         'fo': 'fao',
3764         'fr': 'fra',
3765         'fy': 'fry',
3766         'ga': 'gle',
3767         'gd': 'gla',
3768         'gl': 'glg',
3769         'gn': 'grn',
3770         'gu': 'guj',
3771         'gv': 'glv',
3772         'ha': 'hau',
3773         'he': 'heb',
3774         'iw': 'heb',  # Replaced by he in 1989 revision
3775         'hi': 'hin',
3776         'ho': 'hmo',
3777         'hr': 'hrv',
3778         'ht': 'hat',
3779         'hu': 'hun',
3780         'hy': 'hye',
3781         'hz': 'her',
3782         'ia': 'ina',
3783         'id': 'ind',
3784         'in': 'ind',  # Replaced by id in 1989 revision
3785         'ie': 'ile',
3786         'ig': 'ibo',
3787         'ii': 'iii',
3788         'ik': 'ipk',
3789         'io': 'ido',
3790         'is': 'isl',
3791         'it': 'ita',
3792         'iu': 'iku',
3793         'ja': 'jpn',
3794         'jv': 'jav',
3795         'ka': 'kat',
3796         'kg': 'kon',
3797         'ki': 'kik',
3798         'kj': 'kua',
3799         'kk': 'kaz',
3800         'kl': 'kal',
3801         'km': 'khm',
3802         'kn': 'kan',
3803         'ko': 'kor',
3804         'kr': 'kau',
3805         'ks': 'kas',
3806         'ku': 'kur',
3807         'kv': 'kom',
3808         'kw': 'cor',
3809         'ky': 'kir',
3810         'la': 'lat',
3811         'lb': 'ltz',
3812         'lg': 'lug',
3813         'li': 'lim',
3814         'ln': 'lin',
3815         'lo': 'lao',
3816         'lt': 'lit',
3817         'lu': 'lub',
3818         'lv': 'lav',
3819         'mg': 'mlg',
3820         'mh': 'mah',
3821         'mi': 'mri',
3822         'mk': 'mkd',
3823         'ml': 'mal',
3824         'mn': 'mon',
3825         'mr': 'mar',
3826         'ms': 'msa',
3827         'mt': 'mlt',
3828         'my': 'mya',
3829         'na': 'nau',
3830         'nb': 'nob',
3831         'nd': 'nde',
3832         'ne': 'nep',
3833         'ng': 'ndo',
3834         'nl': 'nld',
3835         'nn': 'nno',
3836         'no': 'nor',
3837         'nr': 'nbl',
3838         'nv': 'nav',
3839         'ny': 'nya',
3840         'oc': 'oci',
3841         'oj': 'oji',
3842         'om': 'orm',
3843         'or': 'ori',
3844         'os': 'oss',
3845         'pa': 'pan',
3846         'pi': 'pli',
3847         'pl': 'pol',
3848         'ps': 'pus',
3849         'pt': 'por',
3850         'qu': 'que',
3851         'rm': 'roh',
3852         'rn': 'run',
3853         'ro': 'ron',
3854         'ru': 'rus',
3855         'rw': 'kin',
3856         'sa': 'san',
3857         'sc': 'srd',
3858         'sd': 'snd',
3859         'se': 'sme',
3860         'sg': 'sag',
3861         'si': 'sin',
3862         'sk': 'slk',
3863         'sl': 'slv',
3864         'sm': 'smo',
3865         'sn': 'sna',
3866         'so': 'som',
3867         'sq': 'sqi',
3868         'sr': 'srp',
3869         'ss': 'ssw',
3870         'st': 'sot',
3871         'su': 'sun',
3872         'sv': 'swe',
3873         'sw': 'swa',
3874         'ta': 'tam',
3875         'te': 'tel',
3876         'tg': 'tgk',
3877         'th': 'tha',
3878         'ti': 'tir',
3879         'tk': 'tuk',
3880         'tl': 'tgl',
3881         'tn': 'tsn',
3882         'to': 'ton',
3883         'tr': 'tur',
3884         'ts': 'tso',
3885         'tt': 'tat',
3886         'tw': 'twi',
3887         'ty': 'tah',
3888         'ug': 'uig',
3889         'uk': 'ukr',
3890         'ur': 'urd',
3891         'uz': 'uzb',
3892         've': 'ven',
3893         'vi': 'vie',
3894         'vo': 'vol',
3895         'wa': 'wln',
3896         'wo': 'wol',
3897         'xh': 'xho',
3898         'yi': 'yid',
3899         'ji': 'yid',  # Replaced by yi in 1989 revision
3900         'yo': 'yor',
3901         'za': 'zha',
3902         'zh': 'zho',
3903         'zu': 'zul',
3904     }
3905
3906     @classmethod
3907     def short2long(cls, code):
3908         """Convert language code from ISO 639-1 to ISO 639-2/T"""
3909         return cls._lang_map.get(code[:2])
3910
3911     @classmethod
3912     def long2short(cls, code):
3913         """Convert language code from ISO 639-2/T to ISO 639-1"""
3914         for short_name, long_name in cls._lang_map.items():
3915             if long_name == code:
3916                 return short_name
3917
3918
3919 class ISO3166Utils(object):
3920     # From http://data.okfn.org/data/core/country-list
3921     _country_map = {
3922         'AF': 'Afghanistan',
3923         'AX': 'Åland Islands',
3924         'AL': 'Albania',
3925         'DZ': 'Algeria',
3926         'AS': 'American Samoa',
3927         'AD': 'Andorra',
3928         'AO': 'Angola',
3929         'AI': 'Anguilla',
3930         'AQ': 'Antarctica',
3931         'AG': 'Antigua and Barbuda',
3932         'AR': 'Argentina',
3933         'AM': 'Armenia',
3934         'AW': 'Aruba',
3935         'AU': 'Australia',
3936         'AT': 'Austria',
3937         'AZ': 'Azerbaijan',
3938         'BS': 'Bahamas',
3939         'BH': 'Bahrain',
3940         'BD': 'Bangladesh',
3941         'BB': 'Barbados',
3942         'BY': 'Belarus',
3943         'BE': 'Belgium',
3944         'BZ': 'Belize',
3945         'BJ': 'Benin',
3946         'BM': 'Bermuda',
3947         'BT': 'Bhutan',
3948         'BO': 'Bolivia, Plurinational State of',
3949         'BQ': 'Bonaire, Sint Eustatius and Saba',
3950         'BA': 'Bosnia and Herzegovina',
3951         'BW': 'Botswana',
3952         'BV': 'Bouvet Island',
3953         'BR': 'Brazil',
3954         'IO': 'British Indian Ocean Territory',
3955         'BN': 'Brunei Darussalam',
3956         'BG': 'Bulgaria',
3957         'BF': 'Burkina Faso',
3958         'BI': 'Burundi',
3959         'KH': 'Cambodia',
3960         'CM': 'Cameroon',
3961         'CA': 'Canada',
3962         'CV': 'Cape Verde',
3963         'KY': 'Cayman Islands',
3964         'CF': 'Central African Republic',
3965         'TD': 'Chad',
3966         'CL': 'Chile',
3967         'CN': 'China',
3968         'CX': 'Christmas Island',
3969         'CC': 'Cocos (Keeling) Islands',
3970         'CO': 'Colombia',
3971         'KM': 'Comoros',
3972         'CG': 'Congo',
3973         'CD': 'Congo, the Democratic Republic of the',
3974         'CK': 'Cook Islands',
3975         'CR': 'Costa Rica',
3976         'CI': 'Côte d\'Ivoire',
3977         'HR': 'Croatia',
3978         'CU': 'Cuba',
3979         'CW': 'Curaçao',
3980         'CY': 'Cyprus',
3981         'CZ': 'Czech Republic',
3982         'DK': 'Denmark',
3983         'DJ': 'Djibouti',
3984         'DM': 'Dominica',
3985         'DO': 'Dominican Republic',
3986         'EC': 'Ecuador',
3987         'EG': 'Egypt',
3988         'SV': 'El Salvador',
3989         'GQ': 'Equatorial Guinea',
3990         'ER': 'Eritrea',
3991         'EE': 'Estonia',
3992         'ET': 'Ethiopia',
3993         'FK': 'Falkland Islands (Malvinas)',
3994         'FO': 'Faroe Islands',
3995         'FJ': 'Fiji',
3996         'FI': 'Finland',
3997         'FR': 'France',
3998         'GF': 'French Guiana',
3999         'PF': 'French Polynesia',
4000         'TF': 'French Southern Territories',
4001         'GA': 'Gabon',
4002         'GM': 'Gambia',
4003         'GE': 'Georgia',
4004         'DE': 'Germany',
4005         'GH': 'Ghana',
4006         'GI': 'Gibraltar',
4007         'GR': 'Greece',
4008         'GL': 'Greenland',
4009         'GD': 'Grenada',
4010         'GP': 'Guadeloupe',
4011         'GU': 'Guam',
4012         'GT': 'Guatemala',
4013         'GG': 'Guernsey',
4014         'GN': 'Guinea',
4015         'GW': 'Guinea-Bissau',
4016         'GY': 'Guyana',
4017         'HT': 'Haiti',
4018         'HM': 'Heard Island and McDonald Islands',
4019         'VA': 'Holy See (Vatican City State)',
4020         'HN': 'Honduras',
4021         'HK': 'Hong Kong',
4022         'HU': 'Hungary',
4023         'IS': 'Iceland',
4024         'IN': 'India',
4025         'ID': 'Indonesia',
4026         'IR': 'Iran, Islamic Republic of',
4027         'IQ': 'Iraq',
4028         'IE': 'Ireland',
4029         'IM': 'Isle of Man',
4030         'IL': 'Israel',
4031         'IT': 'Italy',
4032         'JM': 'Jamaica',
4033         'JP': 'Japan',
4034         'JE': 'Jersey',
4035         'JO': 'Jordan',
4036         'KZ': 'Kazakhstan',
4037         'KE': 'Kenya',
4038         'KI': 'Kiribati',
4039         'KP': 'Korea, Democratic People\'s Republic of',
4040         'KR': 'Korea, Republic of',
4041         'KW': 'Kuwait',
4042         'KG': 'Kyrgyzstan',
4043         'LA': 'Lao People\'s Democratic Republic',
4044         'LV': 'Latvia',
4045         'LB': 'Lebanon',
4046         'LS': 'Lesotho',
4047         'LR': 'Liberia',
4048         'LY': 'Libya',
4049         'LI': 'Liechtenstein',
4050         'LT': 'Lithuania',
4051         'LU': 'Luxembourg',
4052         'MO': 'Macao',
4053         'MK': 'Macedonia, the Former Yugoslav Republic of',
4054         'MG': 'Madagascar',
4055         'MW': 'Malawi',
4056         'MY': 'Malaysia',
4057         'MV': 'Maldives',
4058         'ML': 'Mali',
4059         'MT': 'Malta',
4060         'MH': 'Marshall Islands',
4061         'MQ': 'Martinique',
4062         'MR': 'Mauritania',
4063         'MU': 'Mauritius',
4064         'YT': 'Mayotte',
4065         'MX': 'Mexico',
4066         'FM': 'Micronesia, Federated States of',
4067         'MD': 'Moldova, Republic of',
4068         'MC': 'Monaco',
4069         'MN': 'Mongolia',
4070         'ME': 'Montenegro',
4071         'MS': 'Montserrat',
4072         'MA': 'Morocco',
4073         'MZ': 'Mozambique',
4074         'MM': 'Myanmar',
4075         'NA': 'Namibia',
4076         'NR': 'Nauru',
4077         'NP': 'Nepal',
4078         'NL': 'Netherlands',
4079         'NC': 'New Caledonia',
4080         'NZ': 'New Zealand',
4081         'NI': 'Nicaragua',
4082         'NE': 'Niger',
4083         'NG': 'Nigeria',
4084         'NU': 'Niue',
4085         'NF': 'Norfolk Island',
4086         'MP': 'Northern Mariana Islands',
4087         'NO': 'Norway',
4088         'OM': 'Oman',
4089         'PK': 'Pakistan',
4090         'PW': 'Palau',
4091         'PS': 'Palestine, State of',
4092         'PA': 'Panama',
4093         'PG': 'Papua New Guinea',
4094         'PY': 'Paraguay',
4095         'PE': 'Peru',
4096         'PH': 'Philippines',
4097         'PN': 'Pitcairn',
4098         'PL': 'Poland',
4099         'PT': 'Portugal',
4100         'PR': 'Puerto Rico',
4101         'QA': 'Qatar',
4102         'RE': 'Réunion',
4103         'RO': 'Romania',
4104         'RU': 'Russian Federation',
4105         'RW': 'Rwanda',
4106         'BL': 'Saint Barthélemy',
4107         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4108         'KN': 'Saint Kitts and Nevis',
4109         'LC': 'Saint Lucia',
4110         'MF': 'Saint Martin (French part)',
4111         'PM': 'Saint Pierre and Miquelon',
4112         'VC': 'Saint Vincent and the Grenadines',
4113         'WS': 'Samoa',
4114         'SM': 'San Marino',
4115         'ST': 'Sao Tome and Principe',
4116         'SA': 'Saudi Arabia',
4117         'SN': 'Senegal',
4118         'RS': 'Serbia',
4119         'SC': 'Seychelles',
4120         'SL': 'Sierra Leone',
4121         'SG': 'Singapore',
4122         'SX': 'Sint Maarten (Dutch part)',
4123         'SK': 'Slovakia',
4124         'SI': 'Slovenia',
4125         'SB': 'Solomon Islands',
4126         'SO': 'Somalia',
4127         'ZA': 'South Africa',
4128         'GS': 'South Georgia and the South Sandwich Islands',
4129         'SS': 'South Sudan',
4130         'ES': 'Spain',
4131         'LK': 'Sri Lanka',
4132         'SD': 'Sudan',
4133         'SR': 'Suriname',
4134         'SJ': 'Svalbard and Jan Mayen',
4135         'SZ': 'Swaziland',
4136         'SE': 'Sweden',
4137         'CH': 'Switzerland',
4138         'SY': 'Syrian Arab Republic',
4139         'TW': 'Taiwan, Province of China',
4140         'TJ': 'Tajikistan',
4141         'TZ': 'Tanzania, United Republic of',
4142         'TH': 'Thailand',
4143         'TL': 'Timor-Leste',
4144         'TG': 'Togo',
4145         'TK': 'Tokelau',
4146         'TO': 'Tonga',
4147         'TT': 'Trinidad and Tobago',
4148         'TN': 'Tunisia',
4149         'TR': 'Turkey',
4150         'TM': 'Turkmenistan',
4151         'TC': 'Turks and Caicos Islands',
4152         'TV': 'Tuvalu',
4153         'UG': 'Uganda',
4154         'UA': 'Ukraine',
4155         'AE': 'United Arab Emirates',
4156         'GB': 'United Kingdom',
4157         'US': 'United States',
4158         'UM': 'United States Minor Outlying Islands',
4159         'UY': 'Uruguay',
4160         'UZ': 'Uzbekistan',
4161         'VU': 'Vanuatu',
4162         'VE': 'Venezuela, Bolivarian Republic of',
4163         'VN': 'Viet Nam',
4164         'VG': 'Virgin Islands, British',
4165         'VI': 'Virgin Islands, U.S.',
4166         'WF': 'Wallis and Futuna',
4167         'EH': 'Western Sahara',
4168         'YE': 'Yemen',
4169         'ZM': 'Zambia',
4170         'ZW': 'Zimbabwe',
4171     }
4172
4173     @classmethod
4174     def short2full(cls, code):
4175         """Convert an ISO 3166-2 country code to the corresponding full name"""
4176         return cls._country_map.get(code.upper())
4177
4178
4179 class GeoUtils(object):
4180     # Major IPv4 address blocks per country
4181     _country_ip_map = {
4182         'AD': '46.172.224.0/19',
4183         'AE': '94.200.0.0/13',
4184         'AF': '149.54.0.0/17',
4185         'AG': '209.59.64.0/18',
4186         'AI': '204.14.248.0/21',
4187         'AL': '46.99.0.0/16',
4188         'AM': '46.70.0.0/15',
4189         'AO': '105.168.0.0/13',
4190         'AP': '182.50.184.0/21',
4191         'AQ': '23.154.160.0/24',
4192         'AR': '181.0.0.0/12',
4193         'AS': '202.70.112.0/20',
4194         'AT': '77.116.0.0/14',
4195         'AU': '1.128.0.0/11',
4196         'AW': '181.41.0.0/18',
4197         'AX': '185.217.4.0/22',
4198         'AZ': '5.197.0.0/16',
4199         'BA': '31.176.128.0/17',
4200         'BB': '65.48.128.0/17',
4201         'BD': '114.130.0.0/16',
4202         'BE': '57.0.0.0/8',
4203         'BF': '102.178.0.0/15',
4204         'BG': '95.42.0.0/15',
4205         'BH': '37.131.0.0/17',
4206         'BI': '154.117.192.0/18',
4207         'BJ': '137.255.0.0/16',
4208         'BL': '185.212.72.0/23',
4209         'BM': '196.12.64.0/18',
4210         'BN': '156.31.0.0/16',
4211         'BO': '161.56.0.0/16',
4212         'BQ': '161.0.80.0/20',
4213         'BR': '191.128.0.0/12',
4214         'BS': '24.51.64.0/18',
4215         'BT': '119.2.96.0/19',
4216         'BW': '168.167.0.0/16',
4217         'BY': '178.120.0.0/13',
4218         'BZ': '179.42.192.0/18',
4219         'CA': '99.224.0.0/11',
4220         'CD': '41.243.0.0/16',
4221         'CF': '197.242.176.0/21',
4222         'CG': '160.113.0.0/16',
4223         'CH': '85.0.0.0/13',
4224         'CI': '102.136.0.0/14',
4225         'CK': '202.65.32.0/19',
4226         'CL': '152.172.0.0/14',
4227         'CM': '102.244.0.0/14',
4228         'CN': '36.128.0.0/10',
4229         'CO': '181.240.0.0/12',
4230         'CR': '201.192.0.0/12',
4231         'CU': '152.206.0.0/15',
4232         'CV': '165.90.96.0/19',
4233         'CW': '190.88.128.0/17',
4234         'CY': '31.153.0.0/16',
4235         'CZ': '88.100.0.0/14',
4236         'DE': '53.0.0.0/8',
4237         'DJ': '197.241.0.0/17',
4238         'DK': '87.48.0.0/12',
4239         'DM': '192.243.48.0/20',
4240         'DO': '152.166.0.0/15',
4241         'DZ': '41.96.0.0/12',
4242         'EC': '186.68.0.0/15',
4243         'EE': '90.190.0.0/15',
4244         'EG': '156.160.0.0/11',
4245         'ER': '196.200.96.0/20',
4246         'ES': '88.0.0.0/11',
4247         'ET': '196.188.0.0/14',
4248         'EU': '2.16.0.0/13',
4249         'FI': '91.152.0.0/13',
4250         'FJ': '144.120.0.0/16',
4251         'FK': '80.73.208.0/21',
4252         'FM': '119.252.112.0/20',
4253         'FO': '88.85.32.0/19',
4254         'FR': '90.0.0.0/9',
4255         'GA': '41.158.0.0/15',
4256         'GB': '25.0.0.0/8',
4257         'GD': '74.122.88.0/21',
4258         'GE': '31.146.0.0/16',
4259         'GF': '161.22.64.0/18',
4260         'GG': '62.68.160.0/19',
4261         'GH': '154.160.0.0/12',
4262         'GI': '95.164.0.0/16',
4263         'GL': '88.83.0.0/19',
4264         'GM': '160.182.0.0/15',
4265         'GN': '197.149.192.0/18',
4266         'GP': '104.250.0.0/19',
4267         'GQ': '105.235.224.0/20',
4268         'GR': '94.64.0.0/13',
4269         'GT': '168.234.0.0/16',
4270         'GU': '168.123.0.0/16',
4271         'GW': '197.214.80.0/20',
4272         'GY': '181.41.64.0/18',
4273         'HK': '113.252.0.0/14',
4274         'HN': '181.210.0.0/16',
4275         'HR': '93.136.0.0/13',
4276         'HT': '148.102.128.0/17',
4277         'HU': '84.0.0.0/14',
4278         'ID': '39.192.0.0/10',
4279         'IE': '87.32.0.0/12',
4280         'IL': '79.176.0.0/13',
4281         'IM': '5.62.80.0/20',
4282         'IN': '117.192.0.0/10',
4283         'IO': '203.83.48.0/21',
4284         'IQ': '37.236.0.0/14',
4285         'IR': '2.176.0.0/12',
4286         'IS': '82.221.0.0/16',
4287         'IT': '79.0.0.0/10',
4288         'JE': '87.244.64.0/18',
4289         'JM': '72.27.0.0/17',
4290         'JO': '176.29.0.0/16',
4291         'JP': '133.0.0.0/8',
4292         'KE': '105.48.0.0/12',
4293         'KG': '158.181.128.0/17',
4294         'KH': '36.37.128.0/17',
4295         'KI': '103.25.140.0/22',
4296         'KM': '197.255.224.0/20',
4297         'KN': '198.167.192.0/19',
4298         'KP': '175.45.176.0/22',
4299         'KR': '175.192.0.0/10',
4300         'KW': '37.36.0.0/14',
4301         'KY': '64.96.0.0/15',
4302         'KZ': '2.72.0.0/13',
4303         'LA': '115.84.64.0/18',
4304         'LB': '178.135.0.0/16',
4305         'LC': '24.92.144.0/20',
4306         'LI': '82.117.0.0/19',
4307         'LK': '112.134.0.0/15',
4308         'LR': '102.183.0.0/16',
4309         'LS': '129.232.0.0/17',
4310         'LT': '78.56.0.0/13',
4311         'LU': '188.42.0.0/16',
4312         'LV': '46.109.0.0/16',
4313         'LY': '41.252.0.0/14',
4314         'MA': '105.128.0.0/11',
4315         'MC': '88.209.64.0/18',
4316         'MD': '37.246.0.0/16',
4317         'ME': '178.175.0.0/17',
4318         'MF': '74.112.232.0/21',
4319         'MG': '154.126.0.0/17',
4320         'MH': '117.103.88.0/21',
4321         'MK': '77.28.0.0/15',
4322         'ML': '154.118.128.0/18',
4323         'MM': '37.111.0.0/17',
4324         'MN': '49.0.128.0/17',
4325         'MO': '60.246.0.0/16',
4326         'MP': '202.88.64.0/20',
4327         'MQ': '109.203.224.0/19',
4328         'MR': '41.188.64.0/18',
4329         'MS': '208.90.112.0/22',
4330         'MT': '46.11.0.0/16',
4331         'MU': '105.16.0.0/12',
4332         'MV': '27.114.128.0/18',
4333         'MW': '102.70.0.0/15',
4334         'MX': '187.192.0.0/11',
4335         'MY': '175.136.0.0/13',
4336         'MZ': '197.218.0.0/15',
4337         'NA': '41.182.0.0/16',
4338         'NC': '101.101.0.0/18',
4339         'NE': '197.214.0.0/18',
4340         'NF': '203.17.240.0/22',
4341         'NG': '105.112.0.0/12',
4342         'NI': '186.76.0.0/15',
4343         'NL': '145.96.0.0/11',
4344         'NO': '84.208.0.0/13',
4345         'NP': '36.252.0.0/15',
4346         'NR': '203.98.224.0/19',
4347         'NU': '49.156.48.0/22',
4348         'NZ': '49.224.0.0/14',
4349         'OM': '5.36.0.0/15',
4350         'PA': '186.72.0.0/15',
4351         'PE': '186.160.0.0/14',
4352         'PF': '123.50.64.0/18',
4353         'PG': '124.240.192.0/19',
4354         'PH': '49.144.0.0/13',
4355         'PK': '39.32.0.0/11',
4356         'PL': '83.0.0.0/11',
4357         'PM': '70.36.0.0/20',
4358         'PR': '66.50.0.0/16',
4359         'PS': '188.161.0.0/16',
4360         'PT': '85.240.0.0/13',
4361         'PW': '202.124.224.0/20',
4362         'PY': '181.120.0.0/14',
4363         'QA': '37.210.0.0/15',
4364         'RE': '102.35.0.0/16',
4365         'RO': '79.112.0.0/13',
4366         'RS': '93.86.0.0/15',
4367         'RU': '5.136.0.0/13',
4368         'RW': '41.186.0.0/16',
4369         'SA': '188.48.0.0/13',
4370         'SB': '202.1.160.0/19',
4371         'SC': '154.192.0.0/11',
4372         'SD': '102.120.0.0/13',
4373         'SE': '78.64.0.0/12',
4374         'SG': '8.128.0.0/10',
4375         'SI': '188.196.0.0/14',
4376         'SK': '78.98.0.0/15',
4377         'SL': '102.143.0.0/17',
4378         'SM': '89.186.32.0/19',
4379         'SN': '41.82.0.0/15',
4380         'SO': '154.115.192.0/18',
4381         'SR': '186.179.128.0/17',
4382         'SS': '105.235.208.0/21',
4383         'ST': '197.159.160.0/19',
4384         'SV': '168.243.0.0/16',
4385         'SX': '190.102.0.0/20',
4386         'SY': '5.0.0.0/16',
4387         'SZ': '41.84.224.0/19',
4388         'TC': '65.255.48.0/20',
4389         'TD': '154.68.128.0/19',
4390         'TG': '196.168.0.0/14',
4391         'TH': '171.96.0.0/13',
4392         'TJ': '85.9.128.0/18',
4393         'TK': '27.96.24.0/21',
4394         'TL': '180.189.160.0/20',
4395         'TM': '95.85.96.0/19',
4396         'TN': '197.0.0.0/11',
4397         'TO': '175.176.144.0/21',
4398         'TR': '78.160.0.0/11',
4399         'TT': '186.44.0.0/15',
4400         'TV': '202.2.96.0/19',
4401         'TW': '120.96.0.0/11',
4402         'TZ': '156.156.0.0/14',
4403         'UA': '37.52.0.0/14',
4404         'UG': '102.80.0.0/13',
4405         'US': '6.0.0.0/8',
4406         'UY': '167.56.0.0/13',
4407         'UZ': '84.54.64.0/18',
4408         'VA': '212.77.0.0/19',
4409         'VC': '207.191.240.0/21',
4410         'VE': '186.88.0.0/13',
4411         'VG': '66.81.192.0/20',
4412         'VI': '146.226.0.0/16',
4413         'VN': '14.160.0.0/11',
4414         'VU': '202.80.32.0/20',
4415         'WF': '117.20.32.0/21',
4416         'WS': '202.4.32.0/19',
4417         'YE': '134.35.0.0/16',
4418         'YT': '41.242.116.0/22',
4419         'ZA': '41.0.0.0/11',
4420         'ZM': '102.144.0.0/13',
4421         'ZW': '102.177.192.0/18',
4422     }
4423
4424     @classmethod
4425     def random_ipv4(cls, code_or_block):
4426         if len(code_or_block) == 2:
4427             block = cls._country_ip_map.get(code_or_block.upper())
4428             if not block:
4429                 return None
4430         else:
4431             block = code_or_block
4432         addr, preflen = block.split('/')
4433         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
4434         addr_max = addr_min | (0xffffffff >> int(preflen))
4435         return compat_str(socket.inet_ntoa(
4436             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
4437
4438
4439 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
4440     def __init__(self, proxies=None):
4441         # Set default handlers
4442         for type in ('http', 'https'):
4443             setattr(self, '%s_open' % type,
4444                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4445                         meth(r, proxy, type))
4446         compat_urllib_request.ProxyHandler.__init__(self, proxies)
4447
4448     def proxy_open(self, req, proxy, type):
4449         req_proxy = req.headers.get('Ytdl-request-proxy')
4450         if req_proxy is not None:
4451             proxy = req_proxy
4452             del req.headers['Ytdl-request-proxy']
4453
4454         if proxy == '__noproxy__':
4455             return None  # No Proxy
4456         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4457             req.add_header('Ytdl-socks-proxy', proxy)
4458             # yt-dlp's http/https handlers do wrapping the socket with socks
4459             return None
4460         return compat_urllib_request.ProxyHandler.proxy_open(
4461             self, req, proxy, type)
4462
4463
4464 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4465 # released into Public Domain
4466 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4467
4468 def long_to_bytes(n, blocksize=0):
4469     """long_to_bytes(n:long, blocksize:int) : string
4470     Convert a long integer to a byte string.
4471
4472     If optional blocksize is given and greater than zero, pad the front of the
4473     byte string with binary zeros so that the length is a multiple of
4474     blocksize.
4475     """
4476     # after much testing, this algorithm was deemed to be the fastest
4477     s = b''
4478     n = int(n)
4479     while n > 0:
4480         s = compat_struct_pack('>I', n & 0xffffffff) + s
4481         n = n >> 32
4482     # strip off leading zeros
4483     for i in range(len(s)):
4484         if s[i] != b'\000'[0]:
4485             break
4486     else:
4487         # only happens when n == 0
4488         s = b'\000'
4489         i = 0
4490     s = s[i:]
4491     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4492     # de-padding being done above, but sigh...
4493     if blocksize > 0 and len(s) % blocksize:
4494         s = (blocksize - len(s) % blocksize) * b'\000' + s
4495     return s
4496
4497
4498 def bytes_to_long(s):
4499     """bytes_to_long(string) : long
4500     Convert a byte string to a long integer.
4501
4502     This is (essentially) the inverse of long_to_bytes().
4503     """
4504     acc = 0
4505     length = len(s)
4506     if length % 4:
4507         extra = (4 - length % 4)
4508         s = b'\000' * extra + s
4509         length = length + extra
4510     for i in range(0, length, 4):
4511         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
4512     return acc
4513
4514
4515 def ohdave_rsa_encrypt(data, exponent, modulus):
4516     '''
4517     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4518
4519     Input:
4520         data: data to encrypt, bytes-like object
4521         exponent, modulus: parameter e and N of RSA algorithm, both integer
4522     Output: hex string of encrypted data
4523
4524     Limitation: supports one block encryption only
4525     '''
4526
4527     payload = int(binascii.hexlify(data[::-1]), 16)
4528     encrypted = pow(payload, exponent, modulus)
4529     return '%x' % encrypted
4530
4531
4532 def pkcs1pad(data, length):
4533     """
4534     Padding input data with PKCS#1 scheme
4535
4536     @param {int[]} data        input data
4537     @param {int}   length      target length
4538     @returns {int[]}           padded data
4539     """
4540     if len(data) > length - 11:
4541         raise ValueError('Input data too long for PKCS#1 padding')
4542
4543     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4544     return [0, 2] + pseudo_random + [0] + data
4545
4546
4547 def encode_base_n(num, n, table=None):
4548     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
4549     if not table:
4550         table = FULL_TABLE[:n]
4551
4552     if n > len(table):
4553         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
4554
4555     if num == 0:
4556         return table[0]
4557
4558     ret = ''
4559     while num:
4560         ret = table[num % n] + ret
4561         num = num // n
4562     return ret
4563
4564
4565 def decode_packed_codes(code):
4566     mobj = re.search(PACKED_CODES_RE, code)
4567     obfuscated_code, base, count, symbols = mobj.groups()
4568     base = int(base)
4569     count = int(count)
4570     symbols = symbols.split('|')
4571     symbol_table = {}
4572
4573     while count:
4574         count -= 1
4575         base_n_count = encode_base_n(count, base)
4576         symbol_table[base_n_count] = symbols[count] or base_n_count
4577
4578     return re.sub(
4579         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4580         obfuscated_code)
4581
4582
4583 def caesar(s, alphabet, shift):
4584     if shift == 0:
4585         return s
4586     l = len(alphabet)
4587     return ''.join(
4588         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4589         for c in s)
4590
4591
4592 def rot47(s):
4593     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4594
4595
4596 def parse_m3u8_attributes(attrib):
4597     info = {}
4598     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4599         if val.startswith('"'):
4600             val = val[1:-1]
4601         info[key] = val
4602     return info
4603
4604
4605 def urshift(val, n):
4606     return val >> n if val >= 0 else (val + 0x100000000) >> n
4607
4608
4609 # Based on png2str() written by @gdkchan and improved by @yokrysty
4610 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4611 def decode_png(png_data):
4612     # Reference: https://www.w3.org/TR/PNG/
4613     header = png_data[8:]
4614
4615     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4616         raise IOError('Not a valid PNG file.')
4617
4618     int_map = {1: '>B', 2: '>H', 4: '>I'}
4619     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
4620
4621     chunks = []
4622
4623     while header:
4624         length = unpack_integer(header[:4])
4625         header = header[4:]
4626
4627         chunk_type = header[:4]
4628         header = header[4:]
4629
4630         chunk_data = header[:length]
4631         header = header[length:]
4632
4633         header = header[4:]  # Skip CRC
4634
4635         chunks.append({
4636             'type': chunk_type,
4637             'length': length,
4638             'data': chunk_data
4639         })
4640
4641     ihdr = chunks[0]['data']
4642
4643     width = unpack_integer(ihdr[:4])
4644     height = unpack_integer(ihdr[4:8])
4645
4646     idat = b''
4647
4648     for chunk in chunks:
4649         if chunk['type'] == b'IDAT':
4650             idat += chunk['data']
4651
4652     if not idat:
4653         raise IOError('Unable to read PNG data.')
4654
4655     decompressed_data = bytearray(zlib.decompress(idat))
4656
4657     stride = width * 3
4658     pixels = []
4659
4660     def _get_pixel(idx):
4661         x = idx % stride
4662         y = idx // stride
4663         return pixels[y][x]
4664
4665     for y in range(height):
4666         basePos = y * (1 + stride)
4667         filter_type = decompressed_data[basePos]
4668
4669         current_row = []
4670
4671         pixels.append(current_row)
4672
4673         for x in range(stride):
4674             color = decompressed_data[1 + basePos + x]
4675             basex = y * stride + x
4676             left = 0
4677             up = 0
4678
4679             if x > 2:
4680                 left = _get_pixel(basex - 3)
4681             if y > 0:
4682                 up = _get_pixel(basex - stride)
4683
4684             if filter_type == 1:  # Sub
4685                 color = (color + left) & 0xff
4686             elif filter_type == 2:  # Up
4687                 color = (color + up) & 0xff
4688             elif filter_type == 3:  # Average
4689                 color = (color + ((left + up) >> 1)) & 0xff
4690             elif filter_type == 4:  # Paeth
4691                 a = left
4692                 b = up
4693                 c = 0
4694
4695                 if x > 2 and y > 0:
4696                     c = _get_pixel(basex - stride - 3)
4697
4698                 p = a + b - c
4699
4700                 pa = abs(p - a)
4701                 pb = abs(p - b)
4702                 pc = abs(p - c)
4703
4704                 if pa <= pb and pa <= pc:
4705                     color = (color + a) & 0xff
4706                 elif pb <= pc:
4707                     color = (color + b) & 0xff
4708                 else:
4709                     color = (color + c) & 0xff
4710
4711             current_row.append(color)
4712
4713     return width, height, pixels
4714
4715
4716 def write_xattr(path, key, value):
4717     # This mess below finds the best xattr tool for the job
4718     try:
4719         # try the pyxattr module...
4720         import xattr
4721
4722         if hasattr(xattr, 'set'):  # pyxattr
4723             # Unicode arguments are not supported in python-pyxattr until
4724             # version 0.5.0
4725             # See https://github.com/ytdl-org/youtube-dl/issues/5498
4726             pyxattr_required_version = '0.5.0'
4727             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
4728                 # TODO: fallback to CLI tools
4729                 raise XAttrUnavailableError(
4730                     'python-pyxattr is detected but is too old. '
4731                     'yt-dlp requires %s or above while your version is %s. '
4732                     'Falling back to other xattr implementations' % (
4733                         pyxattr_required_version, xattr.__version__))
4734
4735             setxattr = xattr.set
4736         else:  # xattr
4737             setxattr = xattr.setxattr
4738
4739         try:
4740             setxattr(path, key, value)
4741         except EnvironmentError as e:
4742             raise XAttrMetadataError(e.errno, e.strerror)
4743
4744     except ImportError:
4745         if compat_os_name == 'nt':
4746             # Write xattrs to NTFS Alternate Data Streams:
4747             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4748             assert ':' not in key
4749             assert os.path.exists(path)
4750
4751             ads_fn = path + ':' + key
4752             try:
4753                 with open(ads_fn, 'wb') as f:
4754                     f.write(value)
4755             except EnvironmentError as e:
4756                 raise XAttrMetadataError(e.errno, e.strerror)
4757         else:
4758             user_has_setfattr = check_executable('setfattr', ['--version'])
4759             user_has_xattr = check_executable('xattr', ['-h'])
4760
4761             if user_has_setfattr or user_has_xattr:
4762
4763                 value = value.decode('utf-8')
4764                 if user_has_setfattr:
4765                     executable = 'setfattr'
4766                     opts = ['-n', key, '-v', value]
4767                 elif user_has_xattr:
4768                     executable = 'xattr'
4769                     opts = ['-w', key, value]
4770
4771                 cmd = ([encodeFilename(executable, True)]
4772                        + [encodeArgument(o) for o in opts]
4773                        + [encodeFilename(path, True)])
4774
4775                 try:
4776                     p = Popen(
4777                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
4778                 except EnvironmentError as e:
4779                     raise XAttrMetadataError(e.errno, e.strerror)
4780                 stdout, stderr = p.communicate_or_kill()
4781                 stderr = stderr.decode('utf-8', 'replace')
4782                 if p.returncode != 0:
4783                     raise XAttrMetadataError(p.returncode, stderr)
4784
4785             else:
4786                 # On Unix, and can't find pyxattr, setfattr, or xattr.
4787                 if sys.platform.startswith('linux'):
4788                     raise XAttrUnavailableError(
4789                         "Couldn't find a tool to set the xattrs. "
4790                         "Install either the python 'pyxattr' or 'xattr' "
4791                         "modules, or the GNU 'attr' package "
4792                         "(which contains the 'setfattr' tool).")
4793                 else:
4794                     raise XAttrUnavailableError(
4795                         "Couldn't find a tool to set the xattrs. "
4796                         "Install either the python 'xattr' module, "
4797                         "or the 'xattr' binary.")
4798
4799
4800 def random_birthday(year_field, month_field, day_field):
4801     start_date = datetime.date(1950, 1, 1)
4802     end_date = datetime.date(1995, 12, 31)
4803     offset = random.randint(0, (end_date - start_date).days)
4804     random_date = start_date + datetime.timedelta(offset)
4805     return {
4806         year_field: str(random_date.year),
4807         month_field: str(random_date.month),
4808         day_field: str(random_date.day),
4809     }
4810
4811
4812 # Templates for internet shortcut files, which are plain text files.
4813 DOT_URL_LINK_TEMPLATE = '''
4814 [InternetShortcut]
4815 URL=%(url)s
4816 '''.lstrip()
4817
4818 DOT_WEBLOC_LINK_TEMPLATE = '''
4819 <?xml version="1.0" encoding="UTF-8"?>
4820 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4821 <plist version="1.0">
4822 <dict>
4823 \t<key>URL</key>
4824 \t<string>%(url)s</string>
4825 </dict>
4826 </plist>
4827 '''.lstrip()
4828
4829 DOT_DESKTOP_LINK_TEMPLATE = '''
4830 [Desktop Entry]
4831 Encoding=UTF-8
4832 Name=%(filename)s
4833 Type=Link
4834 URL=%(url)s
4835 Icon=text-html
4836 '''.lstrip()
4837
4838 LINK_TEMPLATES = {
4839     'url': DOT_URL_LINK_TEMPLATE,
4840     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
4841     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
4842 }
4843
4844
4845 def iri_to_uri(iri):
4846     """
4847     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
4848
4849     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
4850     """
4851
4852     iri_parts = compat_urllib_parse_urlparse(iri)
4853
4854     if '[' in iri_parts.netloc:
4855         raise ValueError('IPv6 URIs are not, yet, supported.')
4856         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
4857
4858     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
4859
4860     net_location = ''
4861     if iri_parts.username:
4862         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
4863         if iri_parts.password is not None:
4864             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
4865         net_location += '@'
4866
4867     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
4868     # The 'idna' encoding produces ASCII text.
4869     if iri_parts.port is not None and iri_parts.port != 80:
4870         net_location += ':' + str(iri_parts.port)
4871
4872     return compat_urllib_parse_urlunparse(
4873         (iri_parts.scheme,
4874             net_location,
4875
4876             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
4877
4878             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
4879             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
4880
4881             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
4882             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
4883
4884             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
4885
4886     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
4887
4888
4889 def to_high_limit_path(path):
4890     if sys.platform in ['win32', 'cygwin']:
4891         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
4892         return r'\\?\ '.rstrip() + os.path.abspath(path)
4893
4894     return path
4895
4896
4897 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
4898     val = traverse_obj(obj, *variadic(field))
4899     if val in ignore:
4900         return default
4901     return template % (func(val) if func else val)
4902
4903
4904 def clean_podcast_url(url):
4905     return re.sub(r'''(?x)
4906         (?:
4907             (?:
4908                 chtbl\.com/track|
4909                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
4910                 play\.podtrac\.com
4911             )/[^/]+|
4912             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
4913             flex\.acast\.com|
4914             pd(?:
4915                 cn\.co| # https://podcorn.com/analytics-prefix/
4916                 st\.fm # https://podsights.com/docs/
4917             )/e
4918         )/''', '', url)
4919
4920
4921 _HEX_TABLE = '0123456789abcdef'
4922
4923
4924 def random_uuidv4():
4925     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
4926
4927
4928 def make_dir(path, to_screen=None):
4929     try:
4930         dn = os.path.dirname(path)
4931         if dn and not os.path.exists(dn):
4932             os.makedirs(dn)
4933         return True
4934     except (OSError, IOError) as err:
4935         if callable(to_screen) is not None:
4936             to_screen('unable to create directory ' + error_to_compat_str(err))
4937         return False
4938
4939
4940 def get_executable_path():
4941     from zipimport import zipimporter
4942     if hasattr(sys, 'frozen'):  # Running from PyInstaller
4943         path = os.path.dirname(sys.executable)
4944     elif isinstance(__loader__, zipimporter):  # Running from ZIP
4945         path = os.path.join(os.path.dirname(__file__), '../..')
4946     else:
4947         path = os.path.join(os.path.dirname(__file__), '..')
4948     return os.path.abspath(path)
4949
4950
4951 def load_plugins(name, suffix, namespace):
4952     classes = {}
4953     try:
4954         plugins_spec = importlib.util.spec_from_file_location(
4955             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
4956         plugins = importlib.util.module_from_spec(plugins_spec)
4957         sys.modules[plugins_spec.name] = plugins
4958         plugins_spec.loader.exec_module(plugins)
4959         for name in dir(plugins):
4960             if name in namespace:
4961                 continue
4962             if not name.endswith(suffix):
4963                 continue
4964             klass = getattr(plugins, name)
4965             classes[name] = namespace[name] = klass
4966     except FileNotFoundError:
4967         pass
4968     return classes
4969
4970
4971 def traverse_obj(
4972         obj, *path_list, default=None, expected_type=None, get_all=True,
4973         casesense=True, is_user_input=False, traverse_string=False):
4974     ''' Traverse nested list/dict/tuple
4975     @param path_list        A list of paths which are checked one by one.
4976                             Each path is a list of keys where each key is a string,
4977                             a function, a tuple of strings/None or "...".
4978                             When a fuction is given, it takes the key and value as arguments
4979                             and returns whether the key matches or not. When a tuple is given,
4980                             all the keys given in the tuple are traversed, and
4981                             "..." traverses all the keys in the object
4982                             "None" returns the object without traversal
4983     @param default          Default value to return
4984     @param expected_type    Only accept final value of this type (Can also be any callable)
4985     @param get_all          Return all the values obtained from a path or only the first one
4986     @param casesense        Whether to consider dictionary keys as case sensitive
4987     @param is_user_input    Whether the keys are generated from user input. If True,
4988                             strings are converted to int/slice if necessary
4989     @param traverse_string  Whether to traverse inside strings. If True, any
4990                             non-compatible object will also be converted into a string
4991     # TODO: Write tests
4992     '''
4993     if not casesense:
4994         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
4995         path_list = (map(_lower, variadic(path)) for path in path_list)
4996
4997     def _traverse_obj(obj, path, _current_depth=0):
4998         nonlocal depth
4999         path = tuple(variadic(path))
5000         for i, key in enumerate(path):
5001             if None in (key, obj):
5002                 return obj
5003             if isinstance(key, (list, tuple)):
5004                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
5005                 key = ...
5006             if key is ...:
5007                 obj = (obj.values() if isinstance(obj, dict)
5008                        else obj if isinstance(obj, (list, tuple, LazyList))
5009                        else str(obj) if traverse_string else [])
5010                 _current_depth += 1
5011                 depth = max(depth, _current_depth)
5012                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
5013             elif callable(key):
5014                 if isinstance(obj, (list, tuple, LazyList)):
5015                     obj = enumerate(obj)
5016                 elif isinstance(obj, dict):
5017                     obj = obj.items()
5018                 else:
5019                     if not traverse_string:
5020                         return None
5021                     obj = str(obj)
5022                 _current_depth += 1
5023                 depth = max(depth, _current_depth)
5024                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
5025             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
5026                 obj = (obj.get(key) if casesense or (key in obj)
5027                        else next((v for k, v in obj.items() if _lower(k) == key), None))
5028             else:
5029                 if is_user_input:
5030                     key = (int_or_none(key) if ':' not in key
5031                            else slice(*map(int_or_none, key.split(':'))))
5032                     if key == slice(None):
5033                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
5034                 if not isinstance(key, (int, slice)):
5035                     return None
5036                 if not isinstance(obj, (list, tuple, LazyList)):
5037                     if not traverse_string:
5038                         return None
5039                     obj = str(obj)
5040                 try:
5041                     obj = obj[key]
5042                 except IndexError:
5043                     return None
5044         return obj
5045
5046     if isinstance(expected_type, type):
5047         type_test = lambda val: val if isinstance(val, expected_type) else None
5048     elif expected_type is not None:
5049         type_test = expected_type
5050     else:
5051         type_test = lambda val: val
5052
5053     for path in path_list:
5054         depth = 0
5055         val = _traverse_obj(obj, path)
5056         if val is not None:
5057             if depth:
5058                 for _ in range(depth - 1):
5059                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5060                 val = [v for v in map(type_test, val) if v is not None]
5061                 if val:
5062                     return val if get_all else val[0]
5063             else:
5064                 val = type_test(val)
5065                 if val is not None:
5066                     return val
5067     return default
5068
5069
5070 def traverse_dict(dictn, keys, casesense=True):
5071     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5072                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5073     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5074
5075
5076 def get_first(obj, keys, **kwargs):
5077     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5078
5079
5080 def variadic(x, allowed_types=(str, bytes, dict)):
5081     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5082
5083
5084 def decode_base(value, digits):
5085     # This will convert given base-x string to scalar (long or int)
5086     table = {char: index for index, char in enumerate(digits)}
5087     result = 0
5088     base = len(digits)
5089     for chr in value:
5090         result *= base
5091         result += table[chr]
5092     return result
5093
5094
5095 def time_seconds(**kwargs):
5096     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5097     return t.timestamp()
5098
5099
5100 # create a JSON Web Signature (jws) with HS256 algorithm
5101 # the resulting format is in JWS Compact Serialization
5102 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5103 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5104 def jwt_encode_hs256(payload_data, key, headers={}):
5105     header_data = {
5106         'alg': 'HS256',
5107         'typ': 'JWT',
5108     }
5109     if headers:
5110         header_data.update(headers)
5111     header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
5112     payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
5113     h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
5114     signature_b64 = base64.b64encode(h.digest())
5115     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5116     return token
5117
5118
5119 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5120 def jwt_decode_hs256(jwt):
5121     header_b64, payload_b64, signature_b64 = jwt.split('.')
5122     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5123     return payload_data
5124
5125
5126 def supports_terminal_sequences(stream):
5127     if compat_os_name == 'nt':
5128         from .compat import WINDOWS_VT_MODE  # Must be imported locally
5129         if not WINDOWS_VT_MODE or get_windows_version() < (10, 0, 10586):
5130             return False
5131     elif not os.getenv('TERM'):
5132         return False
5133     try:
5134         return stream.isatty()
5135     except BaseException:
5136         return False
5137
5138
5139 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5140
5141
5142 def remove_terminal_sequences(string):
5143     return _terminal_sequences_re.sub('', string)
5144
5145
5146 def number_of_digits(number):
5147     return len('%d' % number)
5148
5149
5150 def join_nonempty(*values, delim='-', from_dict=None):
5151     if from_dict is not None:
5152         values = map(from_dict.get, values)
5153     return delim.join(map(str, filter(None, values)))
5154
5155
5156 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5157     """
5158     Find the largest format dimensions in terms of video width and, for each thumbnail:
5159     * Modify the URL: Match the width with the provided regex and replace with the former width
5160     * Update dimensions
5161
5162     This function is useful with video services that scale the provided thumbnails on demand
5163     """
5164     _keys = ('width', 'height')
5165     max_dimensions = max(
5166         [tuple(format.get(k) or 0 for k in _keys) for format in formats],
5167         default=(0, 0))
5168     if not max_dimensions[0]:
5169         return thumbnails
5170     return [
5171         merge_dicts(
5172             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5173             dict(zip(_keys, max_dimensions)), thumbnail)
5174         for thumbnail in thumbnails
5175     ]
5176
5177
5178 def parse_http_range(range):
5179     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5180     if not range:
5181         return None, None, None
5182     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5183     if not crg:
5184         return None, None, None
5185     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5186
5187
5188 class Config:
5189     own_args = None
5190     filename = None
5191     __initialized = False
5192
5193     def __init__(self, parser, label=None):
5194         self._parser, self.label = parser, label
5195         self._loaded_paths, self.configs = set(), []
5196
5197     def init(self, args=None, filename=None):
5198         assert not self.__initialized
5199         directory = ''
5200         if filename:
5201             location = os.path.realpath(filename)
5202             directory = os.path.dirname(location)
5203             if location in self._loaded_paths:
5204                 return False
5205             self._loaded_paths.add(location)
5206
5207         self.__initialized = True
5208         self.own_args, self.filename = args, filename
5209         for location in self._parser.parse_args(args)[0].config_locations or []:
5210             location = os.path.join(directory, expand_path(location))
5211             if os.path.isdir(location):
5212                 location = os.path.join(location, 'yt-dlp.conf')
5213             if not os.path.exists(location):
5214                 self._parser.error(f'config location {location} does not exist')
5215             self.append_config(self.read_file(location), location)
5216         return True
5217
5218     def __str__(self):
5219         label = join_nonempty(
5220             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5221             delim=' ')
5222         return join_nonempty(
5223             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5224             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5225             delim='\n')
5226
5227     @staticmethod
5228     def read_file(filename, default=[]):
5229         try:
5230             optionf = open(filename)
5231         except IOError:
5232             return default  # silently skip if file is not present
5233         try:
5234             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5235             contents = optionf.read()
5236             res = compat_shlex_split(contents, comments=True)
5237         finally:
5238             optionf.close()
5239         return res
5240
5241     @staticmethod
5242     def hide_login_info(opts):
5243         PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
5244         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5245
5246         def _scrub_eq(o):
5247             m = eqre.match(o)
5248             if m:
5249                 return m.group('key') + '=PRIVATE'
5250             else:
5251                 return o
5252
5253         opts = list(map(_scrub_eq, opts))
5254         for idx, opt in enumerate(opts):
5255             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5256                 opts[idx + 1] = 'PRIVATE'
5257         return opts
5258
5259     def append_config(self, *args, label=None):
5260         config = type(self)(self._parser, label)
5261         config._loaded_paths = self._loaded_paths
5262         if config.init(*args):
5263             self.configs.append(config)
5264
5265     @property
5266     def all_args(self):
5267         for config in reversed(self.configs):
5268             yield from config.all_args
5269         yield from self.own_args or []
5270
5271     def parse_args(self):
5272         return self._parser.parse_args(list(self.all_args))
5273
5274
5275 class WebSocketsWrapper():
5276     """Wraps websockets module to use in non-async scopes"""
5277
5278     def __init__(self, url, headers=None, connect=True):
5279         self.loop = asyncio.events.new_event_loop()
5280         self.conn = compat_websockets.connect(
5281             url, extra_headers=headers, ping_interval=None,
5282             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5283         if connect:
5284             self.__enter__()
5285         atexit.register(self.__exit__, None, None, None)
5286
5287     def __enter__(self):
5288         if not self.pool:
5289             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5290         return self
5291
5292     def send(self, *args):
5293         self.run_with_loop(self.pool.send(*args), self.loop)
5294
5295     def recv(self, *args):
5296         return self.run_with_loop(self.pool.recv(*args), self.loop)
5297
5298     def __exit__(self, type, value, traceback):
5299         try:
5300             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5301         finally:
5302             self.loop.close()
5303             self._cancel_all_tasks(self.loop)
5304
5305     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5306     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5307     @staticmethod
5308     def run_with_loop(main, loop):
5309         if not asyncio.coroutines.iscoroutine(main):
5310             raise ValueError(f'a coroutine was expected, got {main!r}')
5311
5312         try:
5313             return loop.run_until_complete(main)
5314         finally:
5315             loop.run_until_complete(loop.shutdown_asyncgens())
5316             if hasattr(loop, 'shutdown_default_executor'):
5317                 loop.run_until_complete(loop.shutdown_default_executor())
5318
5319     @staticmethod
5320     def _cancel_all_tasks(loop):
5321         to_cancel = asyncio.tasks.all_tasks(loop)
5322
5323         if not to_cancel:
5324             return
5325
5326         for task in to_cancel:
5327             task.cancel()
5328
5329         loop.run_until_complete(
5330             asyncio.tasks.gather(*to_cancel, loop=loop, return_exceptions=True))
5331
5332         for task in to_cancel:
5333             if task.cancelled():
5334                 continue
5335             if task.exception() is not None:
5336                 loop.call_exception_handler({
5337                     'message': 'unhandled exception during asyncio.run() shutdown',
5338                     'exception': task.exception(),
5339                     'task': task,
5340                 })
5341
5342
5343 has_websockets = bool(compat_websockets)
5344
5345
5346 def merge_headers(*dicts):
5347     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5348     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5349
5350
5351 class classproperty:
5352     def __init__(self, f):
5353         self.f = f
5354
5355     def __get__(self, _, cls):
5356         return self.f(cls)