yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import contextlib
   9 import ctypes
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import importlib.util
  18 import io
  19 import itertools
  20 import json
  21 import locale
  22 import math
  23 import mimetypes
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import shlex
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import types
  38 import urllib.parse
  39 import xml.etree.ElementTree
  40 import zlib
  41
  42 from .compat import asyncio, functools  # isort: split
  43 from .compat import (
  44     compat_chr,
  45     compat_cookiejar,
  46     compat_etree_fromstring,
  47     compat_expanduser,
  48     compat_html_entities,
  49     compat_html_entities_html5,
  50     compat_HTMLParseError,
  51     compat_HTMLParser,
  52     compat_http_client,
  53     compat_HTTPError,
  54     compat_os_name,
  55     compat_parse_qs,
  56     compat_shlex_quote,
  57     compat_str,
  58     compat_struct_pack,
  59     compat_struct_unpack,
  60     compat_urllib_error,
  61     compat_urllib_parse_unquote_plus,
  62     compat_urllib_parse_urlencode,
  63     compat_urllib_parse_urlparse,
  64     compat_urllib_request,
  65     compat_urlparse,
  66 )
  67 from .dependencies import brotli, certifi, websockets
  68 from .socks import ProxyType, sockssocket
  69
  70
  71 def register_socks_protocols():
  72     # "Register" SOCKS protocols
  73     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  74     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  75     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  76         if scheme not in compat_urlparse.uses_netloc:
  77             compat_urlparse.uses_netloc.append(scheme)
  78
  79
  80 # This is not clearly defined otherwise
  81 compiled_regex_type = type(re.compile(''))
  82
  83
  84 def random_user_agent():
  85     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  86     _CHROME_VERSIONS = (
  87         '90.0.4430.212',
  88         '90.0.4430.24',
  89         '90.0.4430.70',
  90         '90.0.4430.72',
  91         '90.0.4430.85',
  92         '90.0.4430.93',
  93         '91.0.4472.101',
  94         '91.0.4472.106',
  95         '91.0.4472.114',
  96         '91.0.4472.124',
  97         '91.0.4472.164',
  98         '91.0.4472.19',
  99         '91.0.4472.77',
 100         '92.0.4515.107',
 101         '92.0.4515.115',
 102         '92.0.4515.131',
 103         '92.0.4515.159',
 104         '92.0.4515.43',
 105         '93.0.4556.0',
 106         '93.0.4577.15',
 107         '93.0.4577.63',
 108         '93.0.4577.82',
 109         '94.0.4606.41',
 110         '94.0.4606.54',
 111         '94.0.4606.61',
 112         '94.0.4606.71',
 113         '94.0.4606.81',
 114         '94.0.4606.85',
 115         '95.0.4638.17',
 116         '95.0.4638.50',
 117         '95.0.4638.54',
 118         '95.0.4638.69',
 119         '95.0.4638.74',
 120         '96.0.4664.18',
 121         '96.0.4664.45',
 122         '96.0.4664.55',
 123         '96.0.4664.93',
 124         '97.0.4692.20',
 125     )
 126     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 127
 128
 129 SUPPORTED_ENCODINGS = [
 130     'gzip', 'deflate'
 131 ]
 132 if brotli:
 133     SUPPORTED_ENCODINGS.append('br')
 134
 135 std_headers = {
 136     'User-Agent': random_user_agent(),
 137     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 138     'Accept-Language': 'en-us,en;q=0.5',
 139     'Sec-Fetch-Mode': 'navigate',
 140 }
 141
 142
 143 USER_AGENTS = {
 144     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 145 }
 146
 147
 148 NO_DEFAULT = object()
 149
 150 ENGLISH_MONTH_NAMES = [
 151     'January', 'February', 'March', 'April', 'May', 'June',
 152     'July', 'August', 'September', 'October', 'November', 'December']
 153
 154 MONTH_NAMES = {
 155     'en': ENGLISH_MONTH_NAMES,
 156     'fr': [
 157         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 158         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 159 }
 160
 161 KNOWN_EXTENSIONS = (
 162     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 163     'flv', 'f4v', 'f4a', 'f4b',
 164     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 165     'mkv', 'mka', 'mk3d',
 166     'avi', 'divx',
 167     'mov',
 168     'asf', 'wmv', 'wma',
 169     '3gp', '3g2',
 170     'mp3',
 171     'flac',
 172     'ape',
 173     'wav',
 174     'f4f', 'f4m', 'm3u8', 'smil')
 175
 176 # needed for sanitizing filenames in restricted mode
 177 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 178                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 179                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 180
 181 DATE_FORMATS = (
 182     '%d %B %Y',
 183     '%d %b %Y',
 184     '%B %d %Y',
 185     '%B %dst %Y',
 186     '%B %dnd %Y',
 187     '%B %drd %Y',
 188     '%B %dth %Y',
 189     '%b %d %Y',
 190     '%b %dst %Y',
 191     '%b %dnd %Y',
 192     '%b %drd %Y',
 193     '%b %dth %Y',
 194     '%b %dst %Y %I:%M',
 195     '%b %dnd %Y %I:%M',
 196     '%b %drd %Y %I:%M',
 197     '%b %dth %Y %I:%M',
 198     '%Y %m %d',
 199     '%Y-%m-%d',
 200     '%Y.%m.%d.',
 201     '%Y/%m/%d',
 202     '%Y/%m/%d %H:%M',
 203     '%Y/%m/%d %H:%M:%S',
 204     '%Y%m%d%H%M',
 205     '%Y%m%d%H%M%S',
 206     '%Y%m%d',
 207     '%Y-%m-%d %H:%M',
 208     '%Y-%m-%d %H:%M:%S',
 209     '%Y-%m-%d %H:%M:%S.%f',
 210     '%Y-%m-%d %H:%M:%S:%f',
 211     '%d.%m.%Y %H:%M',
 212     '%d.%m.%Y %H.%M',
 213     '%Y-%m-%dT%H:%M:%SZ',
 214     '%Y-%m-%dT%H:%M:%S.%fZ',
 215     '%Y-%m-%dT%H:%M:%S.%f0Z',
 216     '%Y-%m-%dT%H:%M:%S',
 217     '%Y-%m-%dT%H:%M:%S.%f',
 218     '%Y-%m-%dT%H:%M',
 219     '%b %d %Y at %H:%M',
 220     '%b %d %Y at %H:%M:%S',
 221     '%B %d %Y at %H:%M',
 222     '%B %d %Y at %H:%M:%S',
 223     '%H:%M %d-%b-%Y',
 224 )
 225
 226 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 227 DATE_FORMATS_DAY_FIRST.extend([
 228     '%d-%m-%Y',
 229     '%d.%m.%Y',
 230     '%d.%m.%y',
 231     '%d/%m/%Y',
 232     '%d/%m/%y',
 233     '%d/%m/%Y %H:%M:%S',
 234 ])
 235
 236 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 237 DATE_FORMATS_MONTH_FIRST.extend([
 238     '%m-%d-%Y',
 239     '%m.%d.%Y',
 240     '%m/%d/%Y',
 241     '%m/%d/%y',
 242     '%m/%d/%Y %H:%M:%S',
 243 ])
 244
 245 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 246 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 247
 248 NUMBER_RE = r'\d+(?:\.\d+)?'
 249
 250
 251 @functools.cache
 252 def preferredencoding():
 253     """Get preferred encoding.
 254
 255     Returns the best encoding scheme for the system, based on
 256     locale.getpreferredencoding() and some further tweaks.
 257     """
 258     try:
 259         pref = locale.getpreferredencoding()
 260         'TEST'.encode(pref)
 261     except Exception:
 262         pref = 'UTF-8'
 263
 264     return pref
 265
 266
 267 def write_json_file(obj, fn):
 268     """ Encode obj as JSON and write it to fn, atomically if possible """
 269
 270     tf = tempfile.NamedTemporaryFile(
 271         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 272         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 273
 274     try:
 275         with tf:
 276             json.dump(obj, tf, ensure_ascii=False)
 277         if sys.platform == 'win32':
 278             # Need to remove existing file on Windows, else os.rename raises
 279             # WindowsError or FileExistsError.
 280             with contextlib.suppress(OSError):
 281                 os.unlink(fn)
 282         with contextlib.suppress(OSError):
 283             mask = os.umask(0)
 284             os.umask(mask)
 285             os.chmod(tf.name, 0o666 & ~mask)
 286         os.rename(tf.name, fn)
 287     except Exception:
 288         with contextlib.suppress(OSError):
 289             os.remove(tf.name)
 290         raise
 291
 292
 293 def find_xpath_attr(node, xpath, key, val=None):
 294     """ Find the xpath xpath[@key=val] """
 295     assert re.match(r'^[a-zA-Z_-]+$', key)
 296     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 297     return node.find(expr)
 298
 299 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 300 # the namespace parameter
 301
 302
 303 def xpath_with_ns(path, ns_map):
 304     components = [c.split(':') for c in path.split('/')]
 305     replaced = []
 306     for c in components:
 307         if len(c) == 1:
 308             replaced.append(c[0])
 309         else:
 310             ns, tag = c
 311             replaced.append('{%s}%s' % (ns_map[ns], tag))
 312     return '/'.join(replaced)
 313
 314
 315 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 316     def _find_xpath(xpath):
 317         return node.find(xpath)
 318
 319     if isinstance(xpath, (str, compat_str)):
 320         n = _find_xpath(xpath)
 321     else:
 322         for xp in xpath:
 323             n = _find_xpath(xp)
 324             if n is not None:
 325                 break
 326
 327     if n is None:
 328         if default is not NO_DEFAULT:
 329             return default
 330         elif fatal:
 331             name = xpath if name is None else name
 332             raise ExtractorError('Could not find XML element %s' % name)
 333         else:
 334             return None
 335     return n
 336
 337
 338 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 339     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 340     if n is None or n == default:
 341         return n
 342     if n.text is None:
 343         if default is not NO_DEFAULT:
 344             return default
 345         elif fatal:
 346             name = xpath if name is None else name
 347             raise ExtractorError('Could not find XML element\'s text %s' % name)
 348         else:
 349             return None
 350     return n.text
 351
 352
 353 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 354     n = find_xpath_attr(node, xpath, key)
 355     if n is None:
 356         if default is not NO_DEFAULT:
 357             return default
 358         elif fatal:
 359             name = f'{xpath}[@{key}]' if name is None else name
 360             raise ExtractorError('Could not find XML attribute %s' % name)
 361         else:
 362             return None
 363     return n.attrib[key]
 364
 365
 366 def get_element_by_id(id, html, **kwargs):
 367     """Return the content of the tag with the specified ID in the passed HTML document"""
 368     return get_element_by_attribute('id', id, html, **kwargs)
 369
 370
 371 def get_element_html_by_id(id, html, **kwargs):
 372     """Return the html of the tag with the specified ID in the passed HTML document"""
 373     return get_element_html_by_attribute('id', id, html, **kwargs)
 374
 375
 376 def get_element_by_class(class_name, html):
 377     """Return the content of the first tag with the specified class in the passed HTML document"""
 378     retval = get_elements_by_class(class_name, html)
 379     return retval[0] if retval else None
 380
 381
 382 def get_element_html_by_class(class_name, html):
 383     """Return the html of the first tag with the specified class in the passed HTML document"""
 384     retval = get_elements_html_by_class(class_name, html)
 385     return retval[0] if retval else None
 386
 387
 388 def get_element_by_attribute(attribute, value, html, **kwargs):
 389     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 390     return retval[0] if retval else None
 391
 392
 393 def get_element_html_by_attribute(attribute, value, html, **kargs):
 394     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 395     return retval[0] if retval else None
 396
 397
 398 def get_elements_by_class(class_name, html, **kargs):
 399     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 400     return get_elements_by_attribute(
 401         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 402         html, escape_value=False)
 403
 404
 405 def get_elements_html_by_class(class_name, html):
 406     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 407     return get_elements_html_by_attribute(
 408         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 409         html, escape_value=False)
 410
 411
 412 def get_elements_by_attribute(*args, **kwargs):
 413     """Return the content of the tag with the specified attribute in the passed HTML document"""
 414     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 415
 416
 417 def get_elements_html_by_attribute(*args, **kwargs):
 418     """Return the html of the tag with the specified attribute in the passed HTML document"""
 419     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 420
 421
 422 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 423     """
 424     Return the text (content) and the html (whole) of the tag with the specified
 425     attribute in the passed HTML document
 426     """
 427
 428     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 429
 430     value = re.escape(value) if escape_value else value
 431
 432     partial_element_re = rf'''(?x)
 433         <(?P<tag>[a-zA-Z0-9:._-]+)
 434          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 435          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 436         '''
 437
 438     for m in re.finditer(partial_element_re, html):
 439         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 440
 441         yield (
 442             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 443             whole
 444         )
 445
 446
 447 class HTMLBreakOnClosingTagParser(compat_HTMLParser):
 448     """
 449     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 450     closing tag for the first opening tag it has encountered, and can be used
 451     as a context manager
 452     """
 453
 454     class HTMLBreakOnClosingTagException(Exception):
 455         pass
 456
 457     def __init__(self):
 458         self.tagstack = collections.deque()
 459         compat_HTMLParser.__init__(self)
 460
 461     def __enter__(self):
 462         return self
 463
 464     def __exit__(self, *_):
 465         self.close()
 466
 467     def close(self):
 468         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 469         # so data remains buffered; we no longer have any interest in it, thus
 470         # override this method to discard it
 471         pass
 472
 473     def handle_starttag(self, tag, _):
 474         self.tagstack.append(tag)
 475
 476     def handle_endtag(self, tag):
 477         if not self.tagstack:
 478             raise compat_HTMLParseError('no tags in the stack')
 479         while self.tagstack:
 480             inner_tag = self.tagstack.pop()
 481             if inner_tag == tag:
 482                 break
 483         else:
 484             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 485         if not self.tagstack:
 486             raise self.HTMLBreakOnClosingTagException()
 487
 488
 489 def get_element_text_and_html_by_tag(tag, html):
 490     """
 491     For the first element with the specified tag in the passed HTML document
 492     return its' content (text) and the whole element (html)
 493     """
 494     def find_or_raise(haystack, needle, exc):
 495         try:
 496             return haystack.index(needle)
 497         except ValueError:
 498             raise exc
 499     closing_tag = f'</{tag}>'
 500     whole_start = find_or_raise(
 501         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 502     content_start = find_or_raise(
 503         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 504     content_start += whole_start + 1
 505     with HTMLBreakOnClosingTagParser() as parser:
 506         parser.feed(html[whole_start:content_start])
 507         if not parser.tagstack or parser.tagstack[0] != tag:
 508             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 509         offset = content_start
 510         while offset < len(html):
 511             next_closing_tag_start = find_or_raise(
 512                 html[offset:], closing_tag,
 513                 compat_HTMLParseError(f'closing {tag} tag not found'))
 514             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 515             try:
 516                 parser.feed(html[offset:offset + next_closing_tag_end])
 517                 offset += next_closing_tag_end
 518             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 519                 return html[content_start:offset + next_closing_tag_start], \
 520                     html[whole_start:offset + next_closing_tag_end]
 521         raise compat_HTMLParseError('unexpected end of html')
 522
 523
 524 class HTMLAttributeParser(compat_HTMLParser):
 525     """Trivial HTML parser to gather the attributes for a single element"""
 526
 527     def __init__(self):
 528         self.attrs = {}
 529         compat_HTMLParser.__init__(self)
 530
 531     def handle_starttag(self, tag, attrs):
 532         self.attrs = dict(attrs)
 533
 534
 535 class HTMLListAttrsParser(compat_HTMLParser):
 536     """HTML parser to gather the attributes for the elements of a list"""
 537
 538     def __init__(self):
 539         compat_HTMLParser.__init__(self)
 540         self.items = []
 541         self._level = 0
 542
 543     def handle_starttag(self, tag, attrs):
 544         if tag == 'li' and self._level == 0:
 545             self.items.append(dict(attrs))
 546         self._level += 1
 547
 548     def handle_endtag(self, tag):
 549         self._level -= 1
 550
 551
 552 def extract_attributes(html_element):
 553     """Given a string for an HTML element such as
 554     <el
 555          a="foo" B="bar" c="&98;az" d=boz
 556          empty= noval entity="&amp;"
 557          sq='"' dq="'"
 558     >
 559     Decode and return a dictionary of attributes.
 560     {
 561         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 562         'empty': '', 'noval': None, 'entity': '&',
 563         'sq': '"', 'dq': '\''
 564     }.
 565     """
 566     parser = HTMLAttributeParser()
 567     with contextlib.suppress(compat_HTMLParseError):
 568         parser.feed(html_element)
 569         parser.close()
 570     return parser.attrs
 571
 572
 573 def parse_list(webpage):
 574     """Given a string for an series of HTML <li> elements,
 575     return a dictionary of their attributes"""
 576     parser = HTMLListAttrsParser()
 577     parser.feed(webpage)
 578     parser.close()
 579     return parser.items
 580
 581
 582 def clean_html(html):
 583     """Clean an HTML snippet into a readable string"""
 584
 585     if html is None:  # Convenience for sanitizing descriptions etc.
 586         return html
 587
 588     html = re.sub(r'\s+', ' ', html)
 589     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 590     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 591     # Strip html tags
 592     html = re.sub('<.*?>', '', html)
 593     # Replace html entities
 594     html = unescapeHTML(html)
 595     return html.strip()
 596
 597
 598 class LenientJSONDecoder(json.JSONDecoder):
 599     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 600         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 601         super().__init__(*args, **kwargs)
 602
 603     def decode(self, s):
 604         if self.transform_source:
 605             s = self.transform_source(s)
 606         if self.ignore_extra:
 607             return self.raw_decode(s.lstrip())[0]
 608         return super().decode(s)
 609
 610
 611 def sanitize_open(filename, open_mode):
 612     """Try to open the given filename, and slightly tweak it if this fails.
 613
 614     Attempts to open the given filename. If this fails, it tries to change
 615     the filename slightly, step by step, until it's either able to open it
 616     or it fails and raises a final exception, like the standard open()
 617     function.
 618
 619     It returns the tuple (stream, definitive_file_name).
 620     """
 621     if filename == '-':
 622         if sys.platform == 'win32':
 623             import msvcrt
 624             msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 625         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 626
 627     for attempt in range(2):
 628         try:
 629             try:
 630                 if sys.platform == 'win32':
 631                     # FIXME: An exclusive lock also locks the file from being read.
 632                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 633                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 634                     raise LockingUnsupportedError()
 635                 stream = locked_file(filename, open_mode, block=False).__enter__()
 636             except OSError:
 637                 stream = open(filename, open_mode)
 638             return stream, filename
 639         except OSError as err:
 640             if attempt or err.errno in (errno.EACCES,):
 641                 raise
 642             old_filename, filename = filename, sanitize_path(filename)
 643             if old_filename == filename:
 644                 raise
 645
 646
 647 def timeconvert(timestr):
 648     """Convert RFC 2822 defined time string into system timestamp"""
 649     timestamp = None
 650     timetuple = email.utils.parsedate_tz(timestr)
 651     if timetuple is not None:
 652         timestamp = email.utils.mktime_tz(timetuple)
 653     return timestamp
 654
 655
 656 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 657     """Sanitizes a string so it could be used as part of a filename.
 658     @param restricted   Use a stricter subset of allowed characters
 659     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 660                         If unset, yt-dlp's new sanitization rules are in effect
 661     """
 662     if s == '':
 663         return ''
 664
 665     def replace_insane(char):
 666         if restricted and char in ACCENT_CHARS:
 667             return ACCENT_CHARS[char]
 668         elif not restricted and char == '\n':
 669             return '\0 '
 670         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 671             return ''
 672         elif char == '"':
 673             return '' if restricted else '\''
 674         elif char == ':':
 675             return '\0_\0-' if restricted else '\0 \0-'
 676         elif char in '\\/|*<>':
 677             return '\0_'
 678         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 679             return '\0_'
 680         return char
 681
 682     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 683     result = ''.join(map(replace_insane, s))
 684     if is_id is NO_DEFAULT:
 685         result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result)  # Remove repeated substitute chars
 686         STRIP_RE = '(?:\0.|[ _-])*'
 687         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 688     result = result.replace('\0', '') or '_'
 689
 690     if not is_id:
 691         while '__' in result:
 692             result = result.replace('__', '_')
 693         result = result.strip('_')
 694         # Common case of "Foreign band name - English song title"
 695         if restricted and result.startswith('-_'):
 696             result = result[2:]
 697         if result.startswith('-'):
 698             result = '_' + result[len('-'):]
 699         result = result.lstrip('.')
 700         if not result:
 701             result = '_'
 702     return result
 703
 704
 705 def sanitize_path(s, force=False):
 706     """Sanitizes and normalizes path on Windows"""
 707     if sys.platform == 'win32':
 708         force = False
 709         drive_or_unc, _ = os.path.splitdrive(s)
 710     elif force:
 711         drive_or_unc = ''
 712     else:
 713         return s
 714
 715     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 716     if drive_or_unc:
 717         norm_path.pop(0)
 718     sanitized_path = [
 719         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 720         for path_part in norm_path]
 721     if drive_or_unc:
 722         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 723     elif force and s and s[0] == os.path.sep:
 724         sanitized_path.insert(0, os.path.sep)
 725     return os.path.join(*sanitized_path)
 726
 727
 728 def sanitize_url(url):
 729     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 730     # the number of unwanted failures due to missing protocol
 731     if url is None:
 732         return
 733     elif url.startswith('//'):
 734         return 'http:%s' % url
 735     # Fix some common typos seen so far
 736     COMMON_TYPOS = (
 737         # https://github.com/ytdl-org/youtube-dl/issues/15649
 738         (r'^httpss://', r'https://'),
 739         # https://bx1.be/lives/direct-tv/
 740         (r'^rmtp([es]?)://', r'rtmp\1://'),
 741     )
 742     for mistake, fixup in COMMON_TYPOS:
 743         if re.match(mistake, url):
 744             return re.sub(mistake, fixup, url)
 745     return url
 746
 747
 748 def extract_basic_auth(url):
 749     parts = compat_urlparse.urlsplit(url)
 750     if parts.username is None:
 751         return url, None
 752     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
 753         parts.hostname if parts.port is None
 754         else '%s:%d' % (parts.hostname, parts.port))))
 755     auth_payload = base64.b64encode(
 756         ('%s:%s' % (parts.username, parts.password or '')).encode())
 757     return url, f'Basic {auth_payload.decode()}'
 758
 759
 760 def sanitized_Request(url, *args, **kwargs):
 761     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 762     if auth_header is not None:
 763         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 764         headers['Authorization'] = auth_header
 765     return compat_urllib_request.Request(url, *args, **kwargs)
 766
 767
 768 def expand_path(s):
 769     """Expand shell variables and ~"""
 770     return os.path.expandvars(compat_expanduser(s))
 771
 772
 773 def orderedSet(iterable):
 774     """ Remove all duplicates from the input iterable """
 775     res = []
 776     for el in iterable:
 777         if el not in res:
 778             res.append(el)
 779     return res
 780
 781
 782 def _htmlentity_transform(entity_with_semicolon):
 783     """Transforms an HTML entity to a character."""
 784     entity = entity_with_semicolon[:-1]
 785
 786     # Known non-numeric HTML entity
 787     if entity in compat_html_entities.name2codepoint:
 788         return compat_chr(compat_html_entities.name2codepoint[entity])
 789
 790     # TODO: HTML5 allows entities without a semicolon. For example,
 791     # '&Eacuteric' should be decoded as 'Éric'.
 792     if entity_with_semicolon in compat_html_entities_html5:
 793         return compat_html_entities_html5[entity_with_semicolon]
 794
 795     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 796     if mobj is not None:
 797         numstr = mobj.group(1)
 798         if numstr.startswith('x'):
 799             base = 16
 800             numstr = '0%s' % numstr
 801         else:
 802             base = 10
 803         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 804         with contextlib.suppress(ValueError):
 805             return compat_chr(int(numstr, base))
 806
 807     # Unknown entity in name, return its literal representation
 808     return '&%s;' % entity
 809
 810
 811 def unescapeHTML(s):
 812     if s is None:
 813         return None
 814     assert isinstance(s, str)
 815
 816     return re.sub(
 817         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 818
 819
 820 def escapeHTML(text):
 821     return (
 822         text
 823         .replace('&', '&amp;')
 824         .replace('<', '&lt;')
 825         .replace('>', '&gt;')
 826         .replace('"', '&quot;')
 827         .replace("'", '&#39;')
 828     )
 829
 830
 831 def process_communicate_or_kill(p, *args, **kwargs):
 832     write_string('DeprecationWarning: yt_dlp.utils.process_communicate_or_kill is deprecated '
 833                  'and may be removed in a future version. Use yt_dlp.utils.Popen.communicate_or_kill instead')
 834     return Popen.communicate_or_kill(p, *args, **kwargs)
 835
 836
 837 class Popen(subprocess.Popen):
 838     if sys.platform == 'win32':
 839         _startupinfo = subprocess.STARTUPINFO()
 840         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 841     else:
 842         _startupinfo = None
 843
 844     def __init__(self, *args, text=False, **kwargs):
 845         if text is True:
 846             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 847             kwargs.setdefault('encoding', 'utf-8')
 848             kwargs.setdefault('errors', 'replace')
 849         super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
 850
 851     def communicate_or_kill(self, *args, **kwargs):
 852         try:
 853             return self.communicate(*args, **kwargs)
 854         except BaseException:  # Including KeyboardInterrupt
 855             self.kill(timeout=None)
 856             raise
 857
 858     def kill(self, *, timeout=0):
 859         super().kill()
 860         if timeout != 0:
 861             self.wait(timeout=timeout)
 862
 863     @classmethod
 864     def run(cls, *args, **kwargs):
 865         with cls(*args, **kwargs) as proc:
 866             stdout, stderr = proc.communicate_or_kill()
 867             return stdout or '', stderr or '', proc.returncode
 868
 869
 870 def get_subprocess_encoding():
 871     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 872         # For subprocess calls, encode with locale encoding
 873         # Refer to http://stackoverflow.com/a/9951851/35070
 874         encoding = preferredencoding()
 875     else:
 876         encoding = sys.getfilesystemencoding()
 877     if encoding is None:
 878         encoding = 'utf-8'
 879     return encoding
 880
 881
 882 def encodeFilename(s, for_subprocess=False):
 883     assert isinstance(s, str)
 884     return s
 885
 886
 887 def decodeFilename(b, for_subprocess=False):
 888     return b
 889
 890
 891 def encodeArgument(s):
 892     # Legacy code that uses byte strings
 893     # Uncomment the following line after fixing all post processors
 894     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 895     return s if isinstance(s, str) else s.decode('ascii')
 896
 897
 898 def decodeArgument(b):
 899     return b
 900
 901
 902 def decodeOption(optval):
 903     if optval is None:
 904         return optval
 905     if isinstance(optval, bytes):
 906         optval = optval.decode(preferredencoding())
 907
 908     assert isinstance(optval, compat_str)
 909     return optval
 910
 911
 912 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 913
 914
 915 def timetuple_from_msec(msec):
 916     secs, msec = divmod(msec, 1000)
 917     mins, secs = divmod(secs, 60)
 918     hrs, mins = divmod(mins, 60)
 919     return _timetuple(hrs, mins, secs, msec)
 920
 921
 922 def formatSeconds(secs, delim=':', msec=False):
 923     time = timetuple_from_msec(secs * 1000)
 924     if time.hours:
 925         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 926     elif time.minutes:
 927         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 928     else:
 929         ret = '%d' % time.seconds
 930     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 931
 932
 933 def _ssl_load_windows_store_certs(ssl_context, storename):
 934     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 935     try:
 936         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 937                  if encoding == 'x509_asn' and (
 938                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 939     except PermissionError:
 940         return
 941     for cert in certs:
 942         with contextlib.suppress(ssl.SSLError):
 943             ssl_context.load_verify_locations(cadata=cert)
 944
 945
 946 def make_HTTPS_handler(params, **kwargs):
 947     opts_check_certificate = not params.get('nocheckcertificate')
 948     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 949     context.check_hostname = opts_check_certificate
 950     if params.get('legacyserverconnect'):
 951         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 952         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 953         context.set_ciphers('DEFAULT')
 954
 955     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 956     if opts_check_certificate:
 957         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 958             context.load_verify_locations(cafile=certifi.where())
 959         try:
 960             context.load_default_certs()
 961         # Work around the issue in load_default_certs when there are bad certificates. See:
 962         # https://github.com/yt-dlp/yt-dlp/issues/1060,
 963         # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 964         except ssl.SSLError:
 965             # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 966             if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 967                 for storename in ('CA', 'ROOT'):
 968                     _ssl_load_windows_store_certs(context, storename)
 969             context.set_default_verify_paths()
 970
 971     client_certfile = params.get('client_certificate')
 972     if client_certfile:
 973         try:
 974             context.load_cert_chain(
 975                 client_certfile, keyfile=params.get('client_certificate_key'),
 976                 password=params.get('client_certificate_password'))
 977         except ssl.SSLError:
 978             raise YoutubeDLError('Unable to load client certificate')
 979
 980     # Some servers may reject requests if ALPN extension is not sent. See:
 981     # https://github.com/python/cpython/issues/85140
 982     # https://github.com/yt-dlp/yt-dlp/issues/3878
 983     with contextlib.suppress(NotImplementedError):
 984         context.set_alpn_protocols(['http/1.1'])
 985
 986     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 987
 988
 989 def bug_reports_message(before=';'):
 990     msg = ('please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , '
 991            'filling out the appropriate issue template. '
 992            'Confirm you are on the latest version using  yt-dlp -U')
 993
 994     before = before.rstrip()
 995     if not before or before.endswith(('.', '!', '?')):
 996         msg = msg[0].title() + msg[1:]
 997
 998     return (before + ' ' if before else '') + msg
 999
1000
1001 class YoutubeDLError(Exception):
1002     """Base exception for YoutubeDL errors."""
1003     msg = None
1004
1005     def __init__(self, msg=None):
1006         if msg is not None:
1007             self.msg = msg
1008         elif self.msg is None:
1009             self.msg = type(self).__name__
1010         super().__init__(self.msg)
1011
1012
1013 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
1014 if hasattr(ssl, 'CertificateError'):
1015     network_exceptions.append(ssl.CertificateError)
1016 network_exceptions = tuple(network_exceptions)
1017
1018
1019 class ExtractorError(YoutubeDLError):
1020     """Error during info extraction."""
1021
1022     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1023         """ tb, if given, is the original traceback (so that it can be printed out).
1024         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1025         """
1026         if sys.exc_info()[0] in network_exceptions:
1027             expected = True
1028
1029         self.orig_msg = str(msg)
1030         self.traceback = tb
1031         self.expected = expected
1032         self.cause = cause
1033         self.video_id = video_id
1034         self.ie = ie
1035         self.exc_info = sys.exc_info()  # preserve original exception
1036
1037         super().__init__(''.join((
1038             format_field(ie, template='[%s] '),
1039             format_field(video_id, template='%s: '),
1040             msg,
1041             format_field(cause, template=' (caused by %r)'),
1042             '' if expected else bug_reports_message())))
1043
1044     def format_traceback(self):
1045         return join_nonempty(
1046             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1047             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1048             delim='\n') or None
1049
1050
1051 class UnsupportedError(ExtractorError):
1052     def __init__(self, url):
1053         super().__init__(
1054             'Unsupported URL: %s' % url, expected=True)
1055         self.url = url
1056
1057
1058 class RegexNotFoundError(ExtractorError):
1059     """Error when a regex didn't match"""
1060     pass
1061
1062
1063 class GeoRestrictedError(ExtractorError):
1064     """Geographic restriction Error exception.
1065
1066     This exception may be thrown when a video is not available from your
1067     geographic location due to geographic restrictions imposed by a website.
1068     """
1069
1070     def __init__(self, msg, countries=None, **kwargs):
1071         kwargs['expected'] = True
1072         super().__init__(msg, **kwargs)
1073         self.countries = countries
1074
1075
1076 class DownloadError(YoutubeDLError):
1077     """Download Error exception.
1078
1079     This exception may be thrown by FileDownloader objects if they are not
1080     configured to continue on errors. They will contain the appropriate
1081     error message.
1082     """
1083
1084     def __init__(self, msg, exc_info=None):
1085         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1086         super().__init__(msg)
1087         self.exc_info = exc_info
1088
1089
1090 class EntryNotInPlaylist(YoutubeDLError):
1091     """Entry not in playlist exception.
1092
1093     This exception will be thrown by YoutubeDL when a requested entry
1094     is not found in the playlist info_dict
1095     """
1096     msg = 'Entry not found in info'
1097
1098
1099 class SameFileError(YoutubeDLError):
1100     """Same File exception.
1101
1102     This exception will be thrown by FileDownloader objects if they detect
1103     multiple files would have to be downloaded to the same file on disk.
1104     """
1105     msg = 'Fixed output name but more than one file to download'
1106
1107     def __init__(self, filename=None):
1108         if filename is not None:
1109             self.msg += f': {filename}'
1110         super().__init__(self.msg)
1111
1112
1113 class PostProcessingError(YoutubeDLError):
1114     """Post Processing exception.
1115
1116     This exception may be raised by PostProcessor's .run() method to
1117     indicate an error in the postprocessing task.
1118     """
1119
1120
1121 class DownloadCancelled(YoutubeDLError):
1122     """ Exception raised when the download queue should be interrupted """
1123     msg = 'The download was cancelled'
1124
1125
1126 class ExistingVideoReached(DownloadCancelled):
1127     """ --break-on-existing triggered """
1128     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1129
1130
1131 class RejectedVideoReached(DownloadCancelled):
1132     """ --break-on-reject triggered """
1133     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1134
1135
1136 class MaxDownloadsReached(DownloadCancelled):
1137     """ --max-downloads limit has been reached. """
1138     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1139
1140
1141 class ReExtractInfo(YoutubeDLError):
1142     """ Video info needs to be re-extracted. """
1143
1144     def __init__(self, msg, expected=False):
1145         super().__init__(msg)
1146         self.expected = expected
1147
1148
1149 class ThrottledDownload(ReExtractInfo):
1150     """ Download speed below --throttled-rate. """
1151     msg = 'The download speed is below throttle limit'
1152
1153     def __init__(self):
1154         super().__init__(self.msg, expected=False)
1155
1156
1157 class UnavailableVideoError(YoutubeDLError):
1158     """Unavailable Format exception.
1159
1160     This exception will be thrown when a video is requested
1161     in a format that is not available for that video.
1162     """
1163     msg = 'Unable to download video'
1164
1165     def __init__(self, err=None):
1166         if err is not None:
1167             self.msg += f': {err}'
1168         super().__init__(self.msg)
1169
1170
1171 class ContentTooShortError(YoutubeDLError):
1172     """Content Too Short exception.
1173
1174     This exception may be raised by FileDownloader objects when a file they
1175     download is too small for what the server announced first, indicating
1176     the connection was probably interrupted.
1177     """
1178
1179     def __init__(self, downloaded, expected):
1180         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1181         # Both in bytes
1182         self.downloaded = downloaded
1183         self.expected = expected
1184
1185
1186 class XAttrMetadataError(YoutubeDLError):
1187     def __init__(self, code=None, msg='Unknown error'):
1188         super().__init__(msg)
1189         self.code = code
1190         self.msg = msg
1191
1192         # Parsing code and msg
1193         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1194                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1195             self.reason = 'NO_SPACE'
1196         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1197             self.reason = 'VALUE_TOO_LONG'
1198         else:
1199             self.reason = 'NOT_SUPPORTED'
1200
1201
1202 class XAttrUnavailableError(YoutubeDLError):
1203     pass
1204
1205
1206 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1207     hc = http_class(*args, **kwargs)
1208     source_address = ydl_handler._params.get('source_address')
1209
1210     if source_address is not None:
1211         # This is to workaround _create_connection() from socket where it will try all
1212         # address data from getaddrinfo() including IPv6. This filters the result from
1213         # getaddrinfo() based on the source_address value.
1214         # This is based on the cpython socket.create_connection() function.
1215         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1216         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1217             host, port = address
1218             err = None
1219             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1220             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1221             ip_addrs = [addr for addr in addrs if addr[0] == af]
1222             if addrs and not ip_addrs:
1223                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1224                 raise OSError(
1225                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1226                     % (ip_version, source_address[0]))
1227             for res in ip_addrs:
1228                 af, socktype, proto, canonname, sa = res
1229                 sock = None
1230                 try:
1231                     sock = socket.socket(af, socktype, proto)
1232                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1233                         sock.settimeout(timeout)
1234                     sock.bind(source_address)
1235                     sock.connect(sa)
1236                     err = None  # Explicitly break reference cycle
1237                     return sock
1238                 except OSError as _:
1239                     err = _
1240                     if sock is not None:
1241                         sock.close()
1242             if err is not None:
1243                 raise err
1244             else:
1245                 raise OSError('getaddrinfo returns an empty list')
1246         if hasattr(hc, '_create_connection'):
1247             hc._create_connection = _create_connection
1248         hc.source_address = (source_address, 0)
1249
1250     return hc
1251
1252
1253 def handle_youtubedl_headers(headers):
1254     filtered_headers = headers
1255
1256     if 'Youtubedl-no-compression' in filtered_headers:
1257         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1258         del filtered_headers['Youtubedl-no-compression']
1259
1260     return filtered_headers
1261
1262
1263 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
1264     """Handler for HTTP requests and responses.
1265
1266     This class, when installed with an OpenerDirector, automatically adds
1267     the standard headers to every HTTP request and handles gzipped and
1268     deflated responses from web servers. If compression is to be avoided in
1269     a particular request, the original request in the program code only has
1270     to include the HTTP header "Youtubedl-no-compression", which will be
1271     removed before making the real request.
1272
1273     Part of this code was copied from:
1274
1275     http://techknack.net/python-urllib2-handlers/
1276
1277     Andrew Rowls, the author of that code, agreed to release it to the
1278     public domain.
1279     """
1280
1281     def __init__(self, params, *args, **kwargs):
1282         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
1283         self._params = params
1284
1285     def http_open(self, req):
1286         conn_class = compat_http_client.HTTPConnection
1287
1288         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1289         if socks_proxy:
1290             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1291             del req.headers['Ytdl-socks-proxy']
1292
1293         return self.do_open(functools.partial(
1294             _create_http_connection, self, conn_class, False),
1295             req)
1296
1297     @staticmethod
1298     def deflate(data):
1299         if not data:
1300             return data
1301         try:
1302             return zlib.decompress(data, -zlib.MAX_WBITS)
1303         except zlib.error:
1304             return zlib.decompress(data)
1305
1306     @staticmethod
1307     def brotli(data):
1308         if not data:
1309             return data
1310         return brotli.decompress(data)
1311
1312     def http_request(self, req):
1313         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1314         # always respected by websites, some tend to give out URLs with non percent-encoded
1315         # non-ASCII characters (see telemb.py, ard.py [#3412])
1316         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1317         # To work around aforementioned issue we will replace request's original URL with
1318         # percent-encoded one
1319         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1320         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1321         url = req.get_full_url()
1322         url_escaped = escape_url(url)
1323
1324         # Substitute URL if any change after escaping
1325         if url != url_escaped:
1326             req = update_Request(req, url=url_escaped)
1327
1328         for h, v in self._params.get('http_headers', std_headers).items():
1329             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1330             # The dict keys are capitalized because of this bug by urllib
1331             if h.capitalize() not in req.headers:
1332                 req.add_header(h, v)
1333
1334         if 'Accept-encoding' not in req.headers:
1335             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1336
1337         req.headers = handle_youtubedl_headers(req.headers)
1338
1339         return req
1340
1341     def http_response(self, req, resp):
1342         old_resp = resp
1343         # gzip
1344         if resp.headers.get('Content-encoding', '') == 'gzip':
1345             content = resp.read()
1346             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1347             try:
1348                 uncompressed = io.BytesIO(gz.read())
1349             except OSError as original_ioerror:
1350                 # There may be junk add the end of the file
1351                 # See http://stackoverflow.com/q/4928560/35070 for details
1352                 for i in range(1, 1024):
1353                     try:
1354                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1355                         uncompressed = io.BytesIO(gz.read())
1356                     except OSError:
1357                         continue
1358                     break
1359                 else:
1360                     raise original_ioerror
1361             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1362             resp.msg = old_resp.msg
1363             del resp.headers['Content-encoding']
1364         # deflate
1365         if resp.headers.get('Content-encoding', '') == 'deflate':
1366             gz = io.BytesIO(self.deflate(resp.read()))
1367             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1368             resp.msg = old_resp.msg
1369             del resp.headers['Content-encoding']
1370         # brotli
1371         if resp.headers.get('Content-encoding', '') == 'br':
1372             resp = compat_urllib_request.addinfourl(
1373                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1374             resp.msg = old_resp.msg
1375             del resp.headers['Content-encoding']
1376         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1377         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1378         if 300 <= resp.code < 400:
1379             location = resp.headers.get('Location')
1380             if location:
1381                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1382                 location = location.encode('iso-8859-1').decode()
1383                 location_escaped = escape_url(location)
1384                 if location != location_escaped:
1385                     del resp.headers['Location']
1386                     resp.headers['Location'] = location_escaped
1387         return resp
1388
1389     https_request = http_request
1390     https_response = http_response
1391
1392
1393 def make_socks_conn_class(base_class, socks_proxy):
1394     assert issubclass(base_class, (
1395         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
1396
1397     url_components = compat_urlparse.urlparse(socks_proxy)
1398     if url_components.scheme.lower() == 'socks5':
1399         socks_type = ProxyType.SOCKS5
1400     elif url_components.scheme.lower() in ('socks', 'socks4'):
1401         socks_type = ProxyType.SOCKS4
1402     elif url_components.scheme.lower() == 'socks4a':
1403         socks_type = ProxyType.SOCKS4A
1404
1405     def unquote_if_non_empty(s):
1406         if not s:
1407             return s
1408         return compat_urllib_parse_unquote_plus(s)
1409
1410     proxy_args = (
1411         socks_type,
1412         url_components.hostname, url_components.port or 1080,
1413         True,  # Remote DNS
1414         unquote_if_non_empty(url_components.username),
1415         unquote_if_non_empty(url_components.password),
1416     )
1417
1418     class SocksConnection(base_class):
1419         def connect(self):
1420             self.sock = sockssocket()
1421             self.sock.setproxy(*proxy_args)
1422             if isinstance(self.timeout, (int, float)):
1423                 self.sock.settimeout(self.timeout)
1424             self.sock.connect((self.host, self.port))
1425
1426             if isinstance(self, compat_http_client.HTTPSConnection):
1427                 if hasattr(self, '_context'):  # Python > 2.6
1428                     self.sock = self._context.wrap_socket(
1429                         self.sock, server_hostname=self.host)
1430                 else:
1431                     self.sock = ssl.wrap_socket(self.sock)
1432
1433     return SocksConnection
1434
1435
1436 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
1437     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1438         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
1439         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
1440         self._params = params
1441
1442     def https_open(self, req):
1443         kwargs = {}
1444         conn_class = self._https_conn_class
1445
1446         if hasattr(self, '_context'):  # python > 2.6
1447             kwargs['context'] = self._context
1448         if hasattr(self, '_check_hostname'):  # python 3.x
1449             kwargs['check_hostname'] = self._check_hostname
1450
1451         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1452         if socks_proxy:
1453             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1454             del req.headers['Ytdl-socks-proxy']
1455
1456         try:
1457             return self.do_open(
1458                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1459         except urllib.error.URLError as e:
1460             if (isinstance(e.reason, ssl.SSLError)
1461                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1462                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1463             raise
1464
1465
1466 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1467     """
1468     See [1] for cookie file format.
1469
1470     1. https://curl.haxx.se/docs/http-cookies.html
1471     """
1472     _HTTPONLY_PREFIX = '#HttpOnly_'
1473     _ENTRY_LEN = 7
1474     _HEADER = '''# Netscape HTTP Cookie File
1475 # This file is generated by yt-dlp.  Do not edit.
1476
1477 '''
1478     _CookieFileEntry = collections.namedtuple(
1479         'CookieFileEntry',
1480         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1481
1482     def __init__(self, filename=None, *args, **kwargs):
1483         super().__init__(None, *args, **kwargs)
1484         if self.is_path(filename):
1485             filename = os.fspath(filename)
1486         self.filename = filename
1487
1488     @staticmethod
1489     def _true_or_false(cndn):
1490         return 'TRUE' if cndn else 'FALSE'
1491
1492     @staticmethod
1493     def is_path(file):
1494         return isinstance(file, (str, bytes, os.PathLike))
1495
1496     @contextlib.contextmanager
1497     def open(self, file, *, write=False):
1498         if self.is_path(file):
1499             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1500                 yield f
1501         else:
1502             if write:
1503                 file.truncate(0)
1504             yield file
1505
1506     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1507         now = time.time()
1508         for cookie in self:
1509             if (not ignore_discard and cookie.discard
1510                     or not ignore_expires and cookie.is_expired(now)):
1511                 continue
1512             name, value = cookie.name, cookie.value
1513             if value is None:
1514                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1515                 # with no name, whereas http.cookiejar regards it as a
1516                 # cookie with no value.
1517                 name, value = '', name
1518             f.write('%s\n' % '\t'.join((
1519                 cookie.domain,
1520                 self._true_or_false(cookie.domain.startswith('.')),
1521                 cookie.path,
1522                 self._true_or_false(cookie.secure),
1523                 str_or_none(cookie.expires, default=''),
1524                 name, value
1525             )))
1526
1527     def save(self, filename=None, *args, **kwargs):
1528         """
1529         Save cookies to a file.
1530         Code is taken from CPython 3.6
1531         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1532
1533         if filename is None:
1534             if self.filename is not None:
1535                 filename = self.filename
1536             else:
1537                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1538
1539         # Store session cookies with `expires` set to 0 instead of an empty string
1540         for cookie in self:
1541             if cookie.expires is None:
1542                 cookie.expires = 0
1543
1544         with self.open(filename, write=True) as f:
1545             f.write(self._HEADER)
1546             self._really_save(f, *args, **kwargs)
1547
1548     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1549         """Load cookies from a file."""
1550         if filename is None:
1551             if self.filename is not None:
1552                 filename = self.filename
1553             else:
1554                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1555
1556         def prepare_line(line):
1557             if line.startswith(self._HTTPONLY_PREFIX):
1558                 line = line[len(self._HTTPONLY_PREFIX):]
1559             # comments and empty lines are fine
1560             if line.startswith('#') or not line.strip():
1561                 return line
1562             cookie_list = line.split('\t')
1563             if len(cookie_list) != self._ENTRY_LEN:
1564                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
1565             cookie = self._CookieFileEntry(*cookie_list)
1566             if cookie.expires_at and not cookie.expires_at.isdigit():
1567                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1568             return line
1569
1570         cf = io.StringIO()
1571         with self.open(filename) as f:
1572             for line in f:
1573                 try:
1574                     cf.write(prepare_line(line))
1575                 except compat_cookiejar.LoadError as e:
1576                     if f'{line.strip()} '[0] in '[{"':
1577                         raise compat_cookiejar.LoadError(
1578                             'Cookies file must be Netscape formatted, not JSON. See  '
1579                             'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl')
1580                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1581                     continue
1582         cf.seek(0)
1583         self._really_load(cf, filename, ignore_discard, ignore_expires)
1584         # Session cookies are denoted by either `expires` field set to
1585         # an empty string or 0. MozillaCookieJar only recognizes the former
1586         # (see [1]). So we need force the latter to be recognized as session
1587         # cookies on our own.
1588         # Session cookies may be important for cookies-based authentication,
1589         # e.g. usually, when user does not check 'Remember me' check box while
1590         # logging in on a site, some important cookies are stored as session
1591         # cookies so that not recognizing them will result in failed login.
1592         # 1. https://bugs.python.org/issue17164
1593         for cookie in self:
1594             # Treat `expires=0` cookies as session cookies
1595             if cookie.expires == 0:
1596                 cookie.expires = None
1597                 cookie.discard = True
1598
1599
1600 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
1601     def __init__(self, cookiejar=None):
1602         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
1603
1604     def http_response(self, request, response):
1605         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
1606
1607     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
1608     https_response = http_response
1609
1610
1611 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
1612     """YoutubeDL redirect handler
1613
1614     The code is based on HTTPRedirectHandler implementation from CPython [1].
1615
1616     This redirect handler solves two issues:
1617      - ensures redirect URL is always unicode under python 2
1618      - introduces support for experimental HTTP response status code
1619        308 Permanent Redirect [2] used by some sites [3]
1620
1621     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1622     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1623     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1624     """
1625
1626     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
1627
1628     def redirect_request(self, req, fp, code, msg, headers, newurl):
1629         """Return a Request or None in response to a redirect.
1630
1631         This is called by the http_error_30x methods when a
1632         redirection response is received.  If a redirection should
1633         take place, return a new Request to allow http_error_30x to
1634         perform the redirect.  Otherwise, raise HTTPError if no-one
1635         else should try to handle this url.  Return None if you can't
1636         but another Handler might.
1637         """
1638         m = req.get_method()
1639         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1640                  or code in (301, 302, 303) and m == "POST")):
1641             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
1642         # Strictly (according to RFC 2616), 301 or 302 in response to
1643         # a POST MUST NOT cause a redirection without confirmation
1644         # from the user (of urllib.request, in this case).  In practice,
1645         # essentially all clients do redirect in this case, so we do
1646         # the same.
1647
1648         # Be conciliant with URIs containing a space.  This is mainly
1649         # redundant with the more complete encoding done in http_error_302(),
1650         # but it is kept for compatibility with other callers.
1651         newurl = newurl.replace(' ', '%20')
1652
1653         CONTENT_HEADERS = ("content-length", "content-type")
1654         # NB: don't use dict comprehension for python 2.6 compatibility
1655         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1656
1657         # A 303 must either use GET or HEAD for subsequent request
1658         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1659         if code == 303 and m != 'HEAD':
1660             m = 'GET'
1661         # 301 and 302 redirects are commonly turned into a GET from a POST
1662         # for subsequent requests by browsers, so we'll do the same.
1663         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1664         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1665         if code in (301, 302) and m == 'POST':
1666             m = 'GET'
1667
1668         return compat_urllib_request.Request(
1669             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1670             unverifiable=True, method=m)
1671
1672
1673 def extract_timezone(date_str):
1674     m = re.search(
1675         r'''(?x)
1676             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1677             (?P<tz>Z|                                            # just the UTC Z, or
1678                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1679                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1680                    [ ]?                                          # optional space
1681                 (?P<sign>\+|-)                                   # +/-
1682                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1683             $)
1684         ''', date_str)
1685     if not m:
1686         timezone = datetime.timedelta()
1687     else:
1688         date_str = date_str[:-len(m.group('tz'))]
1689         if not m.group('sign'):
1690             timezone = datetime.timedelta()
1691         else:
1692             sign = 1 if m.group('sign') == '+' else -1
1693             timezone = datetime.timedelta(
1694                 hours=sign * int(m.group('hours')),
1695                 minutes=sign * int(m.group('minutes')))
1696     return timezone, date_str
1697
1698
1699 def parse_iso8601(date_str, delimiter='T', timezone=None):
1700     """ Return a UNIX timestamp from the given date """
1701
1702     if date_str is None:
1703         return None
1704
1705     date_str = re.sub(r'\.[0-9]+', '', date_str)
1706
1707     if timezone is None:
1708         timezone, date_str = extract_timezone(date_str)
1709
1710     with contextlib.suppress(ValueError):
1711         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1712         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1713         return calendar.timegm(dt.timetuple())
1714
1715
1716 def date_formats(day_first=True):
1717     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1718
1719
1720 def unified_strdate(date_str, day_first=True):
1721     """Return a string with the date in the format YYYYMMDD"""
1722
1723     if date_str is None:
1724         return None
1725     upload_date = None
1726     # Replace commas
1727     date_str = date_str.replace(',', ' ')
1728     # Remove AM/PM + timezone
1729     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1730     _, date_str = extract_timezone(date_str)
1731
1732     for expression in date_formats(day_first):
1733         with contextlib.suppress(ValueError):
1734             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1735     if upload_date is None:
1736         timetuple = email.utils.parsedate_tz(date_str)
1737         if timetuple:
1738             with contextlib.suppress(ValueError):
1739                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1740     if upload_date is not None:
1741         return compat_str(upload_date)
1742
1743
1744 def unified_timestamp(date_str, day_first=True):
1745     if date_str is None:
1746         return None
1747
1748     date_str = re.sub(r'[,|]', '', date_str)
1749
1750     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1751     timezone, date_str = extract_timezone(date_str)
1752
1753     # Remove AM/PM + timezone
1754     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1755
1756     # Remove unrecognized timezones from ISO 8601 alike timestamps
1757     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1758     if m:
1759         date_str = date_str[:-len(m.group('tz'))]
1760
1761     # Python only supports microseconds, so remove nanoseconds
1762     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1763     if m:
1764         date_str = m.group(1)
1765
1766     for expression in date_formats(day_first):
1767         with contextlib.suppress(ValueError):
1768             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1769             return calendar.timegm(dt.timetuple())
1770     timetuple = email.utils.parsedate_tz(date_str)
1771     if timetuple:
1772         return calendar.timegm(timetuple) + pm_delta * 3600
1773
1774
1775 def determine_ext(url, default_ext='unknown_video'):
1776     if url is None or '.' not in url:
1777         return default_ext
1778     guess = url.partition('?')[0].rpartition('.')[2]
1779     if re.match(r'^[A-Za-z0-9]+$', guess):
1780         return guess
1781     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1782     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1783         return guess.rstrip('/')
1784     else:
1785         return default_ext
1786
1787
1788 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1789     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1790
1791
1792 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1793     R"""
1794     Return a datetime object from a string.
1795     Supported format:
1796         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1797
1798     @param format       strftime format of DATE
1799     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1800                         auto: round to the unit provided in date_str (if applicable).
1801     """
1802     auto_precision = False
1803     if precision == 'auto':
1804         auto_precision = True
1805         precision = 'microsecond'
1806     today = datetime_round(datetime.datetime.utcnow(), precision)
1807     if date_str in ('now', 'today'):
1808         return today
1809     if date_str == 'yesterday':
1810         return today - datetime.timedelta(days=1)
1811     match = re.match(
1812         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1813         date_str)
1814     if match is not None:
1815         start_time = datetime_from_str(match.group('start'), precision, format)
1816         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1817         unit = match.group('unit')
1818         if unit == 'month' or unit == 'year':
1819             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1820             unit = 'day'
1821         else:
1822             if unit == 'week':
1823                 unit = 'day'
1824                 time *= 7
1825             delta = datetime.timedelta(**{unit + 's': time})
1826             new_date = start_time + delta
1827         if auto_precision:
1828             return datetime_round(new_date, unit)
1829         return new_date
1830
1831     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1832
1833
1834 def date_from_str(date_str, format='%Y%m%d', strict=False):
1835     R"""
1836     Return a date object from a string using datetime_from_str
1837
1838     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1839                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1840     """
1841     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1842         raise ValueError(f'Invalid date format "{date_str}"')
1843     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1844
1845
1846 def datetime_add_months(dt, months):
1847     """Increment/Decrement a datetime object by months."""
1848     month = dt.month + months - 1
1849     year = dt.year + month // 12
1850     month = month % 12 + 1
1851     day = min(dt.day, calendar.monthrange(year, month)[1])
1852     return dt.replace(year, month, day)
1853
1854
1855 def datetime_round(dt, precision='day'):
1856     """
1857     Round a datetime object's time to a specific precision
1858     """
1859     if precision == 'microsecond':
1860         return dt
1861
1862     unit_seconds = {
1863         'day': 86400,
1864         'hour': 3600,
1865         'minute': 60,
1866         'second': 1,
1867     }
1868     roundto = lambda x, n: ((x + n / 2) // n) * n
1869     timestamp = calendar.timegm(dt.timetuple())
1870     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1871
1872
1873 def hyphenate_date(date_str):
1874     """
1875     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1876     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1877     if match is not None:
1878         return '-'.join(match.groups())
1879     else:
1880         return date_str
1881
1882
1883 class DateRange:
1884     """Represents a time interval between two dates"""
1885
1886     def __init__(self, start=None, end=None):
1887         """start and end must be strings in the format accepted by date"""
1888         if start is not None:
1889             self.start = date_from_str(start, strict=True)
1890         else:
1891             self.start = datetime.datetime.min.date()
1892         if end is not None:
1893             self.end = date_from_str(end, strict=True)
1894         else:
1895             self.end = datetime.datetime.max.date()
1896         if self.start > self.end:
1897             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1898
1899     @classmethod
1900     def day(cls, day):
1901         """Returns a range that only contains the given day"""
1902         return cls(day, day)
1903
1904     def __contains__(self, date):
1905         """Check if the date is in the range"""
1906         if not isinstance(date, datetime.date):
1907             date = date_from_str(date)
1908         return self.start <= date <= self.end
1909
1910     def __str__(self):
1911         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1912
1913
1914 def platform_name():
1915     """ Returns the platform name as a compat_str """
1916     res = platform.platform()
1917     if isinstance(res, bytes):
1918         res = res.decode(preferredencoding())
1919
1920     assert isinstance(res, compat_str)
1921     return res
1922
1923
1924 @functools.cache
1925 def get_windows_version():
1926     ''' Get Windows version. returns () if it's not running on Windows '''
1927     if compat_os_name == 'nt':
1928         return version_tuple(platform.win32_ver()[1])
1929     else:
1930         return ()
1931
1932
1933 def write_string(s, out=None, encoding=None):
1934     assert isinstance(s, str)
1935     out = out or sys.stderr
1936
1937     if compat_os_name == 'nt' and supports_terminal_sequences(out):
1938         s = re.sub(r'([\r\n]+)', r' \1', s)
1939
1940     enc, buffer = None, out
1941     if 'b' in getattr(out, 'mode', ''):
1942         enc = encoding or preferredencoding()
1943     elif hasattr(out, 'buffer'):
1944         buffer = out.buffer
1945         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1946
1947     buffer.write(s.encode(enc, 'ignore') if enc else s)
1948     out.flush()
1949
1950
1951 def bytes_to_intlist(bs):
1952     if not bs:
1953         return []
1954     if isinstance(bs[0], int):  # Python 3
1955         return list(bs)
1956     else:
1957         return [ord(c) for c in bs]
1958
1959
1960 def intlist_to_bytes(xs):
1961     if not xs:
1962         return b''
1963     return compat_struct_pack('%dB' % len(xs), *xs)
1964
1965
1966 class LockingUnsupportedError(OSError):
1967     msg = 'File locking is not supported'
1968
1969     def __init__(self):
1970         super().__init__(self.msg)
1971
1972
1973 # Cross-platform file locking
1974 if sys.platform == 'win32':
1975     import ctypes.wintypes
1976     import msvcrt
1977
1978     class OVERLAPPED(ctypes.Structure):
1979         _fields_ = [
1980             ('Internal', ctypes.wintypes.LPVOID),
1981             ('InternalHigh', ctypes.wintypes.LPVOID),
1982             ('Offset', ctypes.wintypes.DWORD),
1983             ('OffsetHigh', ctypes.wintypes.DWORD),
1984             ('hEvent', ctypes.wintypes.HANDLE),
1985         ]
1986
1987     kernel32 = ctypes.windll.kernel32
1988     LockFileEx = kernel32.LockFileEx
1989     LockFileEx.argtypes = [
1990         ctypes.wintypes.HANDLE,     # hFile
1991         ctypes.wintypes.DWORD,      # dwFlags
1992         ctypes.wintypes.DWORD,      # dwReserved
1993         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1994         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1995         ctypes.POINTER(OVERLAPPED)  # Overlapped
1996     ]
1997     LockFileEx.restype = ctypes.wintypes.BOOL
1998     UnlockFileEx = kernel32.UnlockFileEx
1999     UnlockFileEx.argtypes = [
2000         ctypes.wintypes.HANDLE,     # hFile
2001         ctypes.wintypes.DWORD,      # dwReserved
2002         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2003         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2004         ctypes.POINTER(OVERLAPPED)  # Overlapped
2005     ]
2006     UnlockFileEx.restype = ctypes.wintypes.BOOL
2007     whole_low = 0xffffffff
2008     whole_high = 0x7fffffff
2009
2010     def _lock_file(f, exclusive, block):
2011         overlapped = OVERLAPPED()
2012         overlapped.Offset = 0
2013         overlapped.OffsetHigh = 0
2014         overlapped.hEvent = 0
2015         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2016
2017         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2018                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2019                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2020             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2021             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2022
2023     def _unlock_file(f):
2024         assert f._lock_file_overlapped_p
2025         handle = msvcrt.get_osfhandle(f.fileno())
2026         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2027             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2028
2029 else:
2030     try:
2031         import fcntl
2032
2033         def _lock_file(f, exclusive, block):
2034             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2035             if not block:
2036                 flags |= fcntl.LOCK_NB
2037             try:
2038                 fcntl.flock(f, flags)
2039             except BlockingIOError:
2040                 raise
2041             except OSError:  # AOSP does not have flock()
2042                 fcntl.lockf(f, flags)
2043
2044         def _unlock_file(f):
2045             try:
2046                 fcntl.flock(f, fcntl.LOCK_UN)
2047             except OSError:
2048                 fcntl.lockf(f, fcntl.LOCK_UN)
2049
2050     except ImportError:
2051
2052         def _lock_file(f, exclusive, block):
2053             raise LockingUnsupportedError()
2054
2055         def _unlock_file(f):
2056             raise LockingUnsupportedError()
2057
2058
2059 class locked_file:
2060     locked = False
2061
2062     def __init__(self, filename, mode, block=True, encoding=None):
2063         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2064             raise NotImplementedError(mode)
2065         self.mode, self.block = mode, block
2066
2067         writable = any(f in mode for f in 'wax+')
2068         readable = any(f in mode for f in 'r+')
2069         flags = functools.reduce(operator.ior, (
2070             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2071             getattr(os, 'O_BINARY', 0),  # Windows only
2072             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2073             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2074             os.O_APPEND if 'a' in mode else 0,
2075             os.O_EXCL if 'x' in mode else 0,
2076             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2077         ))
2078
2079         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2080
2081     def __enter__(self):
2082         exclusive = 'r' not in self.mode
2083         try:
2084             _lock_file(self.f, exclusive, self.block)
2085             self.locked = True
2086         except OSError:
2087             self.f.close()
2088             raise
2089         if 'w' in self.mode:
2090             try:
2091                 self.f.truncate()
2092             except OSError as e:
2093                 if e.errno not in (
2094                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2095                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2096                 ):
2097                     raise
2098         return self
2099
2100     def unlock(self):
2101         if not self.locked:
2102             return
2103         try:
2104             _unlock_file(self.f)
2105         finally:
2106             self.locked = False
2107
2108     def __exit__(self, *_):
2109         try:
2110             self.unlock()
2111         finally:
2112             self.f.close()
2113
2114     open = __enter__
2115     close = __exit__
2116
2117     def __getattr__(self, attr):
2118         return getattr(self.f, attr)
2119
2120     def __iter__(self):
2121         return iter(self.f)
2122
2123
2124 @functools.cache
2125 def get_filesystem_encoding():
2126     encoding = sys.getfilesystemencoding()
2127     return encoding if encoding is not None else 'utf-8'
2128
2129
2130 def shell_quote(args):
2131     quoted_args = []
2132     encoding = get_filesystem_encoding()
2133     for a in args:
2134         if isinstance(a, bytes):
2135             # We may get a filename encoded with 'encodeFilename'
2136             a = a.decode(encoding)
2137         quoted_args.append(compat_shlex_quote(a))
2138     return ' '.join(quoted_args)
2139
2140
2141 def smuggle_url(url, data):
2142     """ Pass additional data in a URL for internal use. """
2143
2144     url, idata = unsmuggle_url(url, {})
2145     data.update(idata)
2146     sdata = compat_urllib_parse_urlencode(
2147         {'__youtubedl_smuggle': json.dumps(data)})
2148     return url + '#' + sdata
2149
2150
2151 def unsmuggle_url(smug_url, default=None):
2152     if '#__youtubedl_smuggle' not in smug_url:
2153         return smug_url, default
2154     url, _, sdata = smug_url.rpartition('#')
2155     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
2156     data = json.loads(jsond)
2157     return url, data
2158
2159
2160 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2161     """ Formats numbers with decimal sufixes like K, M, etc """
2162     num, factor = float_or_none(num), float(factor)
2163     if num is None or num < 0:
2164         return None
2165     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2166     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2167     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2168     if factor == 1024:
2169         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2170     converted = num / (factor ** exponent)
2171     return fmt % (converted, suffix)
2172
2173
2174 def format_bytes(bytes):
2175     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2176
2177
2178 def lookup_unit_table(unit_table, s):
2179     units_re = '|'.join(re.escape(u) for u in unit_table)
2180     m = re.match(
2181         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2182     if not m:
2183         return None
2184     num_str = m.group('num').replace(',', '.')
2185     mult = unit_table[m.group('unit')]
2186     return int(float(num_str) * mult)
2187
2188
2189 def parse_filesize(s):
2190     if s is None:
2191         return None
2192
2193     # The lower-case forms are of course incorrect and unofficial,
2194     # but we support those too
2195     _UNIT_TABLE = {
2196         'B': 1,
2197         'b': 1,
2198         'bytes': 1,
2199         'KiB': 1024,
2200         'KB': 1000,
2201         'kB': 1024,
2202         'Kb': 1000,
2203         'kb': 1000,
2204         'kilobytes': 1000,
2205         'kibibytes': 1024,
2206         'MiB': 1024 ** 2,
2207         'MB': 1000 ** 2,
2208         'mB': 1024 ** 2,
2209         'Mb': 1000 ** 2,
2210         'mb': 1000 ** 2,
2211         'megabytes': 1000 ** 2,
2212         'mebibytes': 1024 ** 2,
2213         'GiB': 1024 ** 3,
2214         'GB': 1000 ** 3,
2215         'gB': 1024 ** 3,
2216         'Gb': 1000 ** 3,
2217         'gb': 1000 ** 3,
2218         'gigabytes': 1000 ** 3,
2219         'gibibytes': 1024 ** 3,
2220         'TiB': 1024 ** 4,
2221         'TB': 1000 ** 4,
2222         'tB': 1024 ** 4,
2223         'Tb': 1000 ** 4,
2224         'tb': 1000 ** 4,
2225         'terabytes': 1000 ** 4,
2226         'tebibytes': 1024 ** 4,
2227         'PiB': 1024 ** 5,
2228         'PB': 1000 ** 5,
2229         'pB': 1024 ** 5,
2230         'Pb': 1000 ** 5,
2231         'pb': 1000 ** 5,
2232         'petabytes': 1000 ** 5,
2233         'pebibytes': 1024 ** 5,
2234         'EiB': 1024 ** 6,
2235         'EB': 1000 ** 6,
2236         'eB': 1024 ** 6,
2237         'Eb': 1000 ** 6,
2238         'eb': 1000 ** 6,
2239         'exabytes': 1000 ** 6,
2240         'exbibytes': 1024 ** 6,
2241         'ZiB': 1024 ** 7,
2242         'ZB': 1000 ** 7,
2243         'zB': 1024 ** 7,
2244         'Zb': 1000 ** 7,
2245         'zb': 1000 ** 7,
2246         'zettabytes': 1000 ** 7,
2247         'zebibytes': 1024 ** 7,
2248         'YiB': 1024 ** 8,
2249         'YB': 1000 ** 8,
2250         'yB': 1024 ** 8,
2251         'Yb': 1000 ** 8,
2252         'yb': 1000 ** 8,
2253         'yottabytes': 1000 ** 8,
2254         'yobibytes': 1024 ** 8,
2255     }
2256
2257     return lookup_unit_table(_UNIT_TABLE, s)
2258
2259
2260 def parse_count(s):
2261     if s is None:
2262         return None
2263
2264     s = re.sub(r'^[^\d]+\s', '', s).strip()
2265
2266     if re.match(r'^[\d,.]+$', s):
2267         return str_to_int(s)
2268
2269     _UNIT_TABLE = {
2270         'k': 1000,
2271         'K': 1000,
2272         'm': 1000 ** 2,
2273         'M': 1000 ** 2,
2274         'kk': 1000 ** 2,
2275         'KK': 1000 ** 2,
2276         'b': 1000 ** 3,
2277         'B': 1000 ** 3,
2278     }
2279
2280     ret = lookup_unit_table(_UNIT_TABLE, s)
2281     if ret is not None:
2282         return ret
2283
2284     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2285     if mobj:
2286         return str_to_int(mobj.group(1))
2287
2288
2289 def parse_resolution(s, *, lenient=False):
2290     if s is None:
2291         return {}
2292
2293     if lenient:
2294         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2295     else:
2296         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2297     if mobj:
2298         return {
2299             'width': int(mobj.group('w')),
2300             'height': int(mobj.group('h')),
2301         }
2302
2303     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2304     if mobj:
2305         return {'height': int(mobj.group(1))}
2306
2307     mobj = re.search(r'\b([48])[kK]\b', s)
2308     if mobj:
2309         return {'height': int(mobj.group(1)) * 540}
2310
2311     return {}
2312
2313
2314 def parse_bitrate(s):
2315     if not isinstance(s, compat_str):
2316         return
2317     mobj = re.search(r'\b(\d+)\s*kbps', s)
2318     if mobj:
2319         return int(mobj.group(1))
2320
2321
2322 def month_by_name(name, lang='en'):
2323     """ Return the number of a month by (locale-independently) English name """
2324
2325     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2326
2327     try:
2328         return month_names.index(name) + 1
2329     except ValueError:
2330         return None
2331
2332
2333 def month_by_abbreviation(abbrev):
2334     """ Return the number of a month by (locale-independently) English
2335         abbreviations """
2336
2337     try:
2338         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2339     except ValueError:
2340         return None
2341
2342
2343 def fix_xml_ampersands(xml_str):
2344     """Replace all the '&' by '&amp;' in XML"""
2345     return re.sub(
2346         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2347         '&amp;',
2348         xml_str)
2349
2350
2351 def setproctitle(title):
2352     assert isinstance(title, compat_str)
2353
2354     # ctypes in Jython is not complete
2355     # http://bugs.jython.org/issue2148
2356     if sys.platform.startswith('java'):
2357         return
2358
2359     try:
2360         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2361     except OSError:
2362         return
2363     except TypeError:
2364         # LoadLibrary in Windows Python 2.7.13 only expects
2365         # a bytestring, but since unicode_literals turns
2366         # every string into a unicode string, it fails.
2367         return
2368     title_bytes = title.encode()
2369     buf = ctypes.create_string_buffer(len(title_bytes))
2370     buf.value = title_bytes
2371     try:
2372         libc.prctl(15, buf, 0, 0, 0)
2373     except AttributeError:
2374         return  # Strange libc, just skip this
2375
2376
2377 def remove_start(s, start):
2378     return s[len(start):] if s is not None and s.startswith(start) else s
2379
2380
2381 def remove_end(s, end):
2382     return s[:-len(end)] if s is not None and s.endswith(end) else s
2383
2384
2385 def remove_quotes(s):
2386     if s is None or len(s) < 2:
2387         return s
2388     for quote in ('"', "'", ):
2389         if s[0] == quote and s[-1] == quote:
2390             return s[1:-1]
2391     return s
2392
2393
2394 def get_domain(url):
2395     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
2396     return domain.group('domain') if domain else None
2397
2398
2399 def url_basename(url):
2400     path = compat_urlparse.urlparse(url).path
2401     return path.strip('/').split('/')[-1]
2402
2403
2404 def base_url(url):
2405     return re.match(r'https?://[^?#&]+/', url).group()
2406
2407
2408 def urljoin(base, path):
2409     if isinstance(path, bytes):
2410         path = path.decode()
2411     if not isinstance(path, compat_str) or not path:
2412         return None
2413     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2414         return path
2415     if isinstance(base, bytes):
2416         base = base.decode()
2417     if not isinstance(base, compat_str) or not re.match(
2418             r'^(?:https?:)?//', base):
2419         return None
2420     return compat_urlparse.urljoin(base, path)
2421
2422
2423 class HEADRequest(compat_urllib_request.Request):
2424     def get_method(self):
2425         return 'HEAD'
2426
2427
2428 class PUTRequest(compat_urllib_request.Request):
2429     def get_method(self):
2430         return 'PUT'
2431
2432
2433 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2434     if get_attr and v is not None:
2435         v = getattr(v, get_attr, None)
2436     try:
2437         return int(v) * invscale // scale
2438     except (ValueError, TypeError, OverflowError):
2439         return default
2440
2441
2442 def str_or_none(v, default=None):
2443     return default if v is None else compat_str(v)
2444
2445
2446 def str_to_int(int_str):
2447     """ A more relaxed version of int_or_none """
2448     if isinstance(int_str, int):
2449         return int_str
2450     elif isinstance(int_str, compat_str):
2451         int_str = re.sub(r'[,\.\+]', '', int_str)
2452         return int_or_none(int_str)
2453
2454
2455 def float_or_none(v, scale=1, invscale=1, default=None):
2456     if v is None:
2457         return default
2458     try:
2459         return float(v) * invscale / scale
2460     except (ValueError, TypeError):
2461         return default
2462
2463
2464 def bool_or_none(v, default=None):
2465     return v if isinstance(v, bool) else default
2466
2467
2468 def strip_or_none(v, default=None):
2469     return v.strip() if isinstance(v, compat_str) else default
2470
2471
2472 def url_or_none(url):
2473     if not url or not isinstance(url, compat_str):
2474         return None
2475     url = url.strip()
2476     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2477
2478
2479 def request_to_url(req):
2480     if isinstance(req, compat_urllib_request.Request):
2481         return req.get_full_url()
2482     else:
2483         return req
2484
2485
2486 def strftime_or_none(timestamp, date_format, default=None):
2487     datetime_object = None
2488     try:
2489         if isinstance(timestamp, (int, float)):  # unix timestamp
2490             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2491         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
2492             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2493         return datetime_object.strftime(date_format)
2494     except (ValueError, TypeError, AttributeError):
2495         return default
2496
2497
2498 def parse_duration(s):
2499     if not isinstance(s, str):
2500         return None
2501     s = s.strip()
2502     if not s:
2503         return None
2504
2505     days, hours, mins, secs, ms = [None] * 5
2506     m = re.match(r'''(?x)
2507             (?P<before_secs>
2508                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2509             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2510             (?P<ms>[.:][0-9]+)?Z?$
2511         ''', s)
2512     if m:
2513         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2514     else:
2515         m = re.match(
2516             r'''(?ix)(?:P?
2517                 (?:
2518                     [0-9]+\s*y(?:ears?)?,?\s*
2519                 )?
2520                 (?:
2521                     [0-9]+\s*m(?:onths?)?,?\s*
2522                 )?
2523                 (?:
2524                     [0-9]+\s*w(?:eeks?)?,?\s*
2525                 )?
2526                 (?:
2527                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2528                 )?
2529                 T)?
2530                 (?:
2531                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2532                 )?
2533                 (?:
2534                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2535                 )?
2536                 (?:
2537                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2538                 )?Z?$''', s)
2539         if m:
2540             days, hours, mins, secs, ms = m.groups()
2541         else:
2542             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2543             if m:
2544                 hours, mins = m.groups()
2545             else:
2546                 return None
2547
2548     if ms:
2549         ms = ms.replace(':', '.')
2550     return sum(float(part or 0) * mult for part, mult in (
2551         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2552
2553
2554 def prepend_extension(filename, ext, expected_real_ext=None):
2555     name, real_ext = os.path.splitext(filename)
2556     return (
2557         f'{name}.{ext}{real_ext}'
2558         if not expected_real_ext or real_ext[1:] == expected_real_ext
2559         else f'{filename}.{ext}')
2560
2561
2562 def replace_extension(filename, ext, expected_real_ext=None):
2563     name, real_ext = os.path.splitext(filename)
2564     return '{}.{}'.format(
2565         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2566         ext)
2567
2568
2569 def check_executable(exe, args=[]):
2570     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2571     args can be a list of arguments for a short output (like -version) """
2572     try:
2573         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2574     except OSError:
2575         return False
2576     return exe
2577
2578
2579 def _get_exe_version_output(exe, args, *, to_screen=None):
2580     if to_screen:
2581         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2582     try:
2583         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2584         # SIGTTOU if yt-dlp is run in the background.
2585         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2586         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2587                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2588     except OSError:
2589         return False
2590     return stdout
2591
2592
2593 def detect_exe_version(output, version_re=None, unrecognized='present'):
2594     assert isinstance(output, compat_str)
2595     if version_re is None:
2596         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2597     m = re.search(version_re, output)
2598     if m:
2599         return m.group(1)
2600     else:
2601         return unrecognized
2602
2603
2604 def get_exe_version(exe, args=['--version'],
2605                     version_re=None, unrecognized='present'):
2606     """ Returns the version of the specified executable,
2607     or False if the executable is not present """
2608     out = _get_exe_version_output(exe, args)
2609     return detect_exe_version(out, version_re, unrecognized) if out else False
2610
2611
2612 def frange(start=0, stop=None, step=1):
2613     """Float range"""
2614     if stop is None:
2615         start, stop = 0, start
2616     sign = [-1, 1][step > 0] if step else 0
2617     while sign * start < sign * stop:
2618         yield start
2619         start += step
2620
2621
2622 class LazyList(collections.abc.Sequence):
2623     """Lazy immutable list from an iterable
2624     Note that slices of a LazyList are lists and not LazyList"""
2625
2626     class IndexError(IndexError):
2627         pass
2628
2629     def __init__(self, iterable, *, reverse=False, _cache=None):
2630         self._iterable = iter(iterable)
2631         self._cache = [] if _cache is None else _cache
2632         self._reversed = reverse
2633
2634     def __iter__(self):
2635         if self._reversed:
2636             # We need to consume the entire iterable to iterate in reverse
2637             yield from self.exhaust()
2638             return
2639         yield from self._cache
2640         for item in self._iterable:
2641             self._cache.append(item)
2642             yield item
2643
2644     def _exhaust(self):
2645         self._cache.extend(self._iterable)
2646         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2647         return self._cache
2648
2649     def exhaust(self):
2650         """Evaluate the entire iterable"""
2651         return self._exhaust()[::-1 if self._reversed else 1]
2652
2653     @staticmethod
2654     def _reverse_index(x):
2655         return None if x is None else -(x + 1)
2656
2657     def __getitem__(self, idx):
2658         if isinstance(idx, slice):
2659             if self._reversed:
2660                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2661             start, stop, step = idx.start, idx.stop, idx.step or 1
2662         elif isinstance(idx, int):
2663             if self._reversed:
2664                 idx = self._reverse_index(idx)
2665             start, stop, step = idx, idx, 0
2666         else:
2667             raise TypeError('indices must be integers or slices')
2668         if ((start or 0) < 0 or (stop or 0) < 0
2669                 or (start is None and step < 0)
2670                 or (stop is None and step > 0)):
2671             # We need to consume the entire iterable to be able to slice from the end
2672             # Obviously, never use this with infinite iterables
2673             self._exhaust()
2674             try:
2675                 return self._cache[idx]
2676             except IndexError as e:
2677                 raise self.IndexError(e) from e
2678         n = max(start or 0, stop or 0) - len(self._cache) + 1
2679         if n > 0:
2680             self._cache.extend(itertools.islice(self._iterable, n))
2681         try:
2682             return self._cache[idx]
2683         except IndexError as e:
2684             raise self.IndexError(e) from e
2685
2686     def __bool__(self):
2687         try:
2688             self[-1] if self._reversed else self[0]
2689         except self.IndexError:
2690             return False
2691         return True
2692
2693     def __len__(self):
2694         self._exhaust()
2695         return len(self._cache)
2696
2697     def __reversed__(self):
2698         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2699
2700     def __copy__(self):
2701         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2702
2703     def __repr__(self):
2704         # repr and str should mimic a list. So we exhaust the iterable
2705         return repr(self.exhaust())
2706
2707     def __str__(self):
2708         return repr(self.exhaust())
2709
2710
2711 class PagedList:
2712
2713     class IndexError(IndexError):
2714         pass
2715
2716     def __len__(self):
2717         # This is only useful for tests
2718         return len(self.getslice())
2719
2720     def __init__(self, pagefunc, pagesize, use_cache=True):
2721         self._pagefunc = pagefunc
2722         self._pagesize = pagesize
2723         self._pagecount = float('inf')
2724         self._use_cache = use_cache
2725         self._cache = {}
2726
2727     def getpage(self, pagenum):
2728         page_results = self._cache.get(pagenum)
2729         if page_results is None:
2730             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2731         if self._use_cache:
2732             self._cache[pagenum] = page_results
2733         return page_results
2734
2735     def getslice(self, start=0, end=None):
2736         return list(self._getslice(start, end))
2737
2738     def _getslice(self, start, end):
2739         raise NotImplementedError('This method must be implemented by subclasses')
2740
2741     def __getitem__(self, idx):
2742         assert self._use_cache, 'Indexing PagedList requires cache'
2743         if not isinstance(idx, int) or idx < 0:
2744             raise TypeError('indices must be non-negative integers')
2745         entries = self.getslice(idx, idx + 1)
2746         if not entries:
2747             raise self.IndexError()
2748         return entries[0]
2749
2750
2751 class OnDemandPagedList(PagedList):
2752     """Download pages until a page with less than maximum results"""
2753
2754     def _getslice(self, start, end):
2755         for pagenum in itertools.count(start // self._pagesize):
2756             firstid = pagenum * self._pagesize
2757             nextfirstid = pagenum * self._pagesize + self._pagesize
2758             if start >= nextfirstid:
2759                 continue
2760
2761             startv = (
2762                 start % self._pagesize
2763                 if firstid <= start < nextfirstid
2764                 else 0)
2765             endv = (
2766                 ((end - 1) % self._pagesize) + 1
2767                 if (end is not None and firstid <= end <= nextfirstid)
2768                 else None)
2769
2770             try:
2771                 page_results = self.getpage(pagenum)
2772             except Exception:
2773                 self._pagecount = pagenum - 1
2774                 raise
2775             if startv != 0 or endv is not None:
2776                 page_results = page_results[startv:endv]
2777             yield from page_results
2778
2779             # A little optimization - if current page is not "full", ie. does
2780             # not contain page_size videos then we can assume that this page
2781             # is the last one - there are no more ids on further pages -
2782             # i.e. no need to query again.
2783             if len(page_results) + startv < self._pagesize:
2784                 break
2785
2786             # If we got the whole page, but the next page is not interesting,
2787             # break out early as well
2788             if end == nextfirstid:
2789                 break
2790
2791
2792 class InAdvancePagedList(PagedList):
2793     """PagedList with total number of pages known in advance"""
2794
2795     def __init__(self, pagefunc, pagecount, pagesize):
2796         PagedList.__init__(self, pagefunc, pagesize, True)
2797         self._pagecount = pagecount
2798
2799     def _getslice(self, start, end):
2800         start_page = start // self._pagesize
2801         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2802         skip_elems = start - start_page * self._pagesize
2803         only_more = None if end is None else end - start
2804         for pagenum in range(start_page, end_page):
2805             page_results = self.getpage(pagenum)
2806             if skip_elems:
2807                 page_results = page_results[skip_elems:]
2808                 skip_elems = None
2809             if only_more is not None:
2810                 if len(page_results) < only_more:
2811                     only_more -= len(page_results)
2812                 else:
2813                     yield from page_results[:only_more]
2814                     break
2815             yield from page_results
2816
2817
2818 class PlaylistEntries:
2819     MissingEntry = object()
2820     is_exhausted = False
2821
2822     def __init__(self, ydl, info_dict):
2823         self.ydl, self.info_dict = ydl, info_dict
2824
2825     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2826         (?P<start>[+-]?\d+)?
2827         (?P<range>[:-]
2828             (?P<end>[+-]?\d+|inf(?:inite)?)?
2829             (?::(?P<step>[+-]?\d+))?
2830         )?''')
2831
2832     @classmethod
2833     def parse_playlist_items(cls, string):
2834         for segment in string.split(','):
2835             if not segment:
2836                 raise ValueError('There is two or more consecutive commas')
2837             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2838             if not mobj:
2839                 raise ValueError(f'{segment!r} is not a valid specification')
2840             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2841             if int_or_none(step) == 0:
2842                 raise ValueError(f'Step in {segment!r} cannot be zero')
2843             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2844
2845     def get_requested_items(self):
2846         playlist_items = self.ydl.params.get('playlist_items')
2847         playlist_start = self.ydl.params.get('playliststart', 1)
2848         playlist_end = self.ydl.params.get('playlistend')
2849         # For backwards compatibility, interpret -1 as whole list
2850         if playlist_end in (-1, None):
2851             playlist_end = ''
2852         if not playlist_items:
2853             playlist_items = f'{playlist_start}:{playlist_end}'
2854         elif playlist_start != 1 or playlist_end:
2855             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
2856
2857         for index in self.parse_playlist_items(playlist_items):
2858             for i, entry in self[index]:
2859                 yield i, entry
2860                 try:
2861                     # TODO: Add auto-generated fields
2862                     self.ydl._match_entry(entry, incomplete=True, silent=True)
2863                 except (ExistingVideoReached, RejectedVideoReached):
2864                     return
2865
2866     @property
2867     def full_count(self):
2868         if self.info_dict.get('playlist_count'):
2869             return self.info_dict['playlist_count']
2870         elif self.is_exhausted and not self.is_incomplete:
2871             return len(self)
2872         elif isinstance(self._entries, InAdvancePagedList):
2873             if self._entries._pagesize == 1:
2874                 return self._entries._pagecount
2875
2876     @functools.cached_property
2877     def _entries(self):
2878         entries = self.info_dict.get('entries')
2879         if entries is None:
2880             raise EntryNotInPlaylist('There are no entries')
2881         elif isinstance(entries, list):
2882             self.is_exhausted = True
2883
2884         indices = self.info_dict.get('requested_entries')
2885         self.is_incomplete = bool(indices)
2886         if self.is_incomplete:
2887             assert self.is_exhausted
2888             ret = [self.MissingEntry] * max(indices)
2889             for i, entry in zip(indices, entries):
2890                 ret[i - 1] = entry
2891             return ret
2892
2893         if isinstance(entries, (list, PagedList, LazyList)):
2894             return entries
2895         return LazyList(entries)
2896
2897     @functools.cached_property
2898     def _getter(self):
2899         if isinstance(self._entries, list):
2900             def get_entry(i):
2901                 try:
2902                     entry = self._entries[i]
2903                 except IndexError:
2904                     entry = self.MissingEntry
2905                     if not self.is_incomplete:
2906                         raise self.IndexError()
2907                 if entry is self.MissingEntry:
2908                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
2909                 return entry
2910         else:
2911             def get_entry(i):
2912                 try:
2913                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
2914                 except (LazyList.IndexError, PagedList.IndexError):
2915                     raise self.IndexError()
2916         return get_entry
2917
2918     def __getitem__(self, idx):
2919         if isinstance(idx, int):
2920             idx = slice(idx, idx)
2921
2922         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
2923         step = 1 if idx.step is None else idx.step
2924         if idx.start is None:
2925             start = 0 if step > 0 else len(self) - 1
2926         else:
2927             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
2928
2929         # NB: Do not call len(self) when idx == [:]
2930         if idx.stop is None:
2931             stop = 0 if step < 0 else float('inf')
2932         else:
2933             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
2934         stop += [-1, 1][step > 0]
2935
2936         for i in frange(start, stop, step):
2937             if i < 0:
2938                 continue
2939             try:
2940                 try:
2941                     entry = self._getter(i)
2942                 except self.IndexError:
2943                     self.is_exhausted = True
2944                     if step > 0:
2945                         break
2946                     continue
2947             except IndexError:
2948                 if self.is_exhausted:
2949                     break
2950                 raise
2951             yield i + 1, entry
2952
2953     def __len__(self):
2954         return len(tuple(self[:]))
2955
2956     class IndexError(IndexError):
2957         pass
2958
2959
2960 def uppercase_escape(s):
2961     unicode_escape = codecs.getdecoder('unicode_escape')
2962     return re.sub(
2963         r'\\U[0-9a-fA-F]{8}',
2964         lambda m: unicode_escape(m.group(0))[0],
2965         s)
2966
2967
2968 def lowercase_escape(s):
2969     unicode_escape = codecs.getdecoder('unicode_escape')
2970     return re.sub(
2971         r'\\u[0-9a-fA-F]{4}',
2972         lambda m: unicode_escape(m.group(0))[0],
2973         s)
2974
2975
2976 def escape_rfc3986(s):
2977     """Escape non-ASCII characters as suggested by RFC 3986"""
2978     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2979
2980
2981 def escape_url(url):
2982     """Escape URL as suggested by RFC 3986"""
2983     url_parsed = compat_urllib_parse_urlparse(url)
2984     return url_parsed._replace(
2985         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2986         path=escape_rfc3986(url_parsed.path),
2987         params=escape_rfc3986(url_parsed.params),
2988         query=escape_rfc3986(url_parsed.query),
2989         fragment=escape_rfc3986(url_parsed.fragment)
2990     ).geturl()
2991
2992
2993 def parse_qs(url):
2994     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2995
2996
2997 def read_batch_urls(batch_fd):
2998     def fixup(url):
2999         if not isinstance(url, compat_str):
3000             url = url.decode('utf-8', 'replace')
3001         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3002         for bom in BOM_UTF8:
3003             if url.startswith(bom):
3004                 url = url[len(bom):]
3005         url = url.lstrip()
3006         if not url or url.startswith(('#', ';', ']')):
3007             return False
3008         # "#" cannot be stripped out since it is part of the URI
3009         # However, it can be safely stipped out if follwing a whitespace
3010         return re.split(r'\s#', url, 1)[0].rstrip()
3011
3012     with contextlib.closing(batch_fd) as fd:
3013         return [url for url in map(fixup, fd) if url]
3014
3015
3016 def urlencode_postdata(*args, **kargs):
3017     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3018
3019
3020 def update_url_query(url, query):
3021     if not query:
3022         return url
3023     parsed_url = compat_urlparse.urlparse(url)
3024     qs = compat_parse_qs(parsed_url.query)
3025     qs.update(query)
3026     return compat_urlparse.urlunparse(parsed_url._replace(
3027         query=compat_urllib_parse_urlencode(qs, True)))
3028
3029
3030 def update_Request(req, url=None, data=None, headers={}, query={}):
3031     req_headers = req.headers.copy()
3032     req_headers.update(headers)
3033     req_data = data or req.data
3034     req_url = update_url_query(url or req.get_full_url(), query)
3035     req_get_method = req.get_method()
3036     if req_get_method == 'HEAD':
3037         req_type = HEADRequest
3038     elif req_get_method == 'PUT':
3039         req_type = PUTRequest
3040     else:
3041         req_type = compat_urllib_request.Request
3042     new_req = req_type(
3043         req_url, data=req_data, headers=req_headers,
3044         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3045     if hasattr(req, 'timeout'):
3046         new_req.timeout = req.timeout
3047     return new_req
3048
3049
3050 def _multipart_encode_impl(data, boundary):
3051     content_type = 'multipart/form-data; boundary=%s' % boundary
3052
3053     out = b''
3054     for k, v in data.items():
3055         out += b'--' + boundary.encode('ascii') + b'\r\n'
3056         if isinstance(k, compat_str):
3057             k = k.encode()
3058         if isinstance(v, compat_str):
3059             v = v.encode()
3060         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3061         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3062         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3063         if boundary.encode('ascii') in content:
3064             raise ValueError('Boundary overlaps with data')
3065         out += content
3066
3067     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3068
3069     return out, content_type
3070
3071
3072 def multipart_encode(data, boundary=None):
3073     '''
3074     Encode a dict to RFC 7578-compliant form-data
3075
3076     data:
3077         A dict where keys and values can be either Unicode or bytes-like
3078         objects.
3079     boundary:
3080         If specified a Unicode object, it's used as the boundary. Otherwise
3081         a random boundary is generated.
3082
3083     Reference: https://tools.ietf.org/html/rfc7578
3084     '''
3085     has_specified_boundary = boundary is not None
3086
3087     while True:
3088         if boundary is None:
3089             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3090
3091         try:
3092             out, content_type = _multipart_encode_impl(data, boundary)
3093             break
3094         except ValueError:
3095             if has_specified_boundary:
3096                 raise
3097             boundary = None
3098
3099     return out, content_type
3100
3101
3102 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3103     for val in map(d.get, variadic(key_or_keys)):
3104         if val is not None and (val or not skip_false_values):
3105             return val
3106     return default
3107
3108
3109 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3110     for f in funcs:
3111         try:
3112             val = f(*args, **kwargs)
3113         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
3114             pass
3115         else:
3116             if expected_type is None or isinstance(val, expected_type):
3117                 return val
3118
3119
3120 def try_get(src, getter, expected_type=None):
3121     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3122
3123
3124 def filter_dict(dct, cndn=lambda _, v: v is not None):
3125     return {k: v for k, v in dct.items() if cndn(k, v)}
3126
3127
3128 def merge_dicts(*dicts):
3129     merged = {}
3130     for a_dict in dicts:
3131         for k, v in a_dict.items():
3132             if (v is not None and k not in merged
3133                     or isinstance(v, str) and merged[k] == ''):
3134                 merged[k] = v
3135     return merged
3136
3137
3138 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3139     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
3140
3141
3142 US_RATINGS = {
3143     'G': 0,
3144     'PG': 10,
3145     'PG-13': 13,
3146     'R': 16,
3147     'NC': 18,
3148 }
3149
3150
3151 TV_PARENTAL_GUIDELINES = {
3152     'TV-Y': 0,
3153     'TV-Y7': 7,
3154     'TV-G': 0,
3155     'TV-PG': 0,
3156     'TV-14': 14,
3157     'TV-MA': 17,
3158 }
3159
3160
3161 def parse_age_limit(s):
3162     # isinstance(False, int) is True. So type() must be used instead
3163     if type(s) is int:  # noqa: E721
3164         return s if 0 <= s <= 21 else None
3165     elif not isinstance(s, str):
3166         return None
3167     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3168     if m:
3169         return int(m.group('age'))
3170     s = s.upper()
3171     if s in US_RATINGS:
3172         return US_RATINGS[s]
3173     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3174     if m:
3175         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3176     return None
3177
3178
3179 def strip_jsonp(code):
3180     return re.sub(
3181         r'''(?sx)^
3182             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3183             (?:\s*&&\s*(?P=func_name))?
3184             \s*\(\s*(?P<callback_data>.*)\);?
3185             \s*?(?://[^\n]*)*$''',
3186         r'\g<callback_data>', code)
3187
3188
3189 def js_to_json(code, vars={}):
3190     # vars is a dict of var, val pairs to substitute
3191     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3192     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3193     INTEGER_TABLE = (
3194         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3195         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3196     )
3197
3198     def fix_kv(m):
3199         v = m.group(0)
3200         if v in ('true', 'false', 'null'):
3201             return v
3202         elif v in ('undefined', 'void 0'):
3203             return 'null'
3204         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3205             return ""
3206
3207         if v[0] in ("'", '"'):
3208             v = re.sub(r'(?s)\\.|"', lambda m: {
3209                 '"': '\\"',
3210                 "\\'": "'",
3211                 '\\\n': '',
3212                 '\\x': '\\u00',
3213             }.get(m.group(0), m.group(0)), v[1:-1])
3214         else:
3215             for regex, base in INTEGER_TABLE:
3216                 im = re.match(regex, v)
3217                 if im:
3218                     i = int(im.group(1), base)
3219                     return '"%d":' % i if v.endswith(':') else '%d' % i
3220
3221             if v in vars:
3222                 return vars[v]
3223
3224         return '"%s"' % v
3225
3226     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3227
3228     return re.sub(r'''(?sx)
3229         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
3230         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
3231         {comment}|,(?={skip}[\]}}])|
3232         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3233         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3234         [0-9]+(?={skip}:)|
3235         !+
3236         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3237
3238
3239 def qualities(quality_ids):
3240     """ Get a numeric quality value out of a list of possible values """
3241     def q(qid):
3242         try:
3243             return quality_ids.index(qid)
3244         except ValueError:
3245             return -1
3246     return q
3247
3248
3249 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist')
3250
3251
3252 DEFAULT_OUTTMPL = {
3253     'default': '%(title)s [%(id)s].%(ext)s',
3254     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3255 }
3256 OUTTMPL_TYPES = {
3257     'chapter': None,
3258     'subtitle': None,
3259     'thumbnail': None,
3260     'description': 'description',
3261     'annotation': 'annotations.xml',
3262     'infojson': 'info.json',
3263     'link': None,
3264     'pl_video': None,
3265     'pl_thumbnail': None,
3266     'pl_description': 'description',
3267     'pl_infojson': 'info.json',
3268 }
3269
3270 # As of [1] format syntax is:
3271 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3272 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3273 STR_FORMAT_RE_TMPL = r'''(?x)
3274     (?<!%)(?P<prefix>(?:%%)*)
3275     %
3276     (?P<has_key>\((?P<key>{0})\))?
3277     (?P<format>
3278         (?P<conversion>[#0\-+ ]+)?
3279         (?P<min_width>\d+)?
3280         (?P<precision>\.\d+)?
3281         (?P<len_mod>[hlL])?  # unused in python
3282         {1}  # conversion type
3283     )
3284 '''
3285
3286
3287 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3288
3289
3290 def limit_length(s, length):
3291     """ Add ellipses to overly long strings """
3292     if s is None:
3293         return None
3294     ELLIPSES = '...'
3295     if len(s) > length:
3296         return s[:length - len(ELLIPSES)] + ELLIPSES
3297     return s
3298
3299
3300 def version_tuple(v):
3301     return tuple(int(e) for e in re.split(r'[-.]', v))
3302
3303
3304 def is_outdated_version(version, limit, assume_new=True):
3305     if not version:
3306         return not assume_new
3307     try:
3308         return version_tuple(version) < version_tuple(limit)
3309     except ValueError:
3310         return not assume_new
3311
3312
3313 def ytdl_is_updateable():
3314     """ Returns if yt-dlp can be updated with -U """
3315
3316     from .update import is_non_updateable
3317
3318     return not is_non_updateable()
3319
3320
3321 def args_to_str(args):
3322     # Get a short string representation for a subprocess command
3323     return ' '.join(compat_shlex_quote(a) for a in args)
3324
3325
3326 def error_to_compat_str(err):
3327     return str(err)
3328
3329
3330 def error_to_str(err):
3331     return f'{type(err).__name__}: {err}'
3332
3333
3334 def mimetype2ext(mt):
3335     if mt is None:
3336         return None
3337
3338     mt, _, params = mt.partition(';')
3339     mt = mt.strip()
3340
3341     FULL_MAP = {
3342         'audio/mp4': 'm4a',
3343         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3344         # it's the most popular one
3345         'audio/mpeg': 'mp3',
3346         'audio/x-wav': 'wav',
3347         'audio/wav': 'wav',
3348         'audio/wave': 'wav',
3349     }
3350
3351     ext = FULL_MAP.get(mt)
3352     if ext is not None:
3353         return ext
3354
3355     SUBTYPE_MAP = {
3356         '3gpp': '3gp',
3357         'smptett+xml': 'tt',
3358         'ttaf+xml': 'dfxp',
3359         'ttml+xml': 'ttml',
3360         'x-flv': 'flv',
3361         'x-mp4-fragmented': 'mp4',
3362         'x-ms-sami': 'sami',
3363         'x-ms-wmv': 'wmv',
3364         'mpegurl': 'm3u8',
3365         'x-mpegurl': 'm3u8',
3366         'vnd.apple.mpegurl': 'm3u8',
3367         'dash+xml': 'mpd',
3368         'f4m+xml': 'f4m',
3369         'hds+xml': 'f4m',
3370         'vnd.ms-sstr+xml': 'ism',
3371         'quicktime': 'mov',
3372         'mp2t': 'ts',
3373         'x-wav': 'wav',
3374         'filmstrip+json': 'fs',
3375         'svg+xml': 'svg',
3376     }
3377
3378     _, _, subtype = mt.rpartition('/')
3379     ext = SUBTYPE_MAP.get(subtype.lower())
3380     if ext is not None:
3381         return ext
3382
3383     SUFFIX_MAP = {
3384         'json': 'json',
3385         'xml': 'xml',
3386         'zip': 'zip',
3387         'gzip': 'gz',
3388     }
3389
3390     _, _, suffix = subtype.partition('+')
3391     ext = SUFFIX_MAP.get(suffix)
3392     if ext is not None:
3393         return ext
3394
3395     return subtype.replace('+', '.')
3396
3397
3398 def ext2mimetype(ext_or_url):
3399     if not ext_or_url:
3400         return None
3401     if '.' not in ext_or_url:
3402         ext_or_url = f'file.{ext_or_url}'
3403     return mimetypes.guess_type(ext_or_url)[0]
3404
3405
3406 def parse_codecs(codecs_str):
3407     # http://tools.ietf.org/html/rfc6381
3408     if not codecs_str:
3409         return {}
3410     split_codecs = list(filter(None, map(
3411         str.strip, codecs_str.strip().strip(',').split(','))))
3412     vcodec, acodec, scodec, hdr = None, None, None, None
3413     for full_codec in split_codecs:
3414         parts = full_codec.split('.')
3415         codec = parts[0].replace('0', '')
3416         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3417                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3418             if not vcodec:
3419                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
3420                 if codec in ('dvh1', 'dvhe'):
3421                     hdr = 'DV'
3422                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
3423                     hdr = 'HDR10'
3424                 elif full_codec.replace('0', '').startswith('vp9.2'):
3425                     hdr = 'HDR10'
3426         elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3427             if not acodec:
3428                 acodec = full_codec
3429         elif codec in ('stpp', 'wvtt',):
3430             if not scodec:
3431                 scodec = full_codec
3432         else:
3433             write_string(f'WARNING: Unknown codec {full_codec}\n')
3434     if vcodec or acodec or scodec:
3435         return {
3436             'vcodec': vcodec or 'none',
3437             'acodec': acodec or 'none',
3438             'dynamic_range': hdr,
3439             **({'scodec': scodec} if scodec is not None else {}),
3440         }
3441     elif len(split_codecs) == 2:
3442         return {
3443             'vcodec': split_codecs[0],
3444             'acodec': split_codecs[1],
3445         }
3446     return {}
3447
3448
3449 def urlhandle_detect_ext(url_handle):
3450     getheader = url_handle.headers.get
3451
3452     cd = getheader('Content-Disposition')
3453     if cd:
3454         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3455         if m:
3456             e = determine_ext(m.group('filename'), default_ext=None)
3457             if e:
3458                 return e
3459
3460     return mimetype2ext(getheader('Content-Type'))
3461
3462
3463 def encode_data_uri(data, mime_type):
3464     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3465
3466
3467 def age_restricted(content_limit, age_limit):
3468     """ Returns True iff the content should be blocked """
3469
3470     if age_limit is None:  # No limit set
3471         return False
3472     if content_limit is None:
3473         return False  # Content available for everyone
3474     return age_limit < content_limit
3475
3476
3477 def is_html(first_bytes):
3478     """ Detect whether a file contains HTML by examining its first bytes. """
3479
3480     BOMS = [
3481         (b'\xef\xbb\xbf', 'utf-8'),
3482         (b'\x00\x00\xfe\xff', 'utf-32-be'),
3483         (b'\xff\xfe\x00\x00', 'utf-32-le'),
3484         (b'\xff\xfe', 'utf-16-le'),
3485         (b'\xfe\xff', 'utf-16-be'),
3486     ]
3487
3488     encoding = 'utf-8'
3489     for bom, enc in BOMS:
3490         while first_bytes.startswith(bom):
3491             encoding, first_bytes = enc, first_bytes[len(bom):]
3492
3493     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3494
3495
3496 def determine_protocol(info_dict):
3497     protocol = info_dict.get('protocol')
3498     if protocol is not None:
3499         return protocol
3500
3501     url = sanitize_url(info_dict['url'])
3502     if url.startswith('rtmp'):
3503         return 'rtmp'
3504     elif url.startswith('mms'):
3505         return 'mms'
3506     elif url.startswith('rtsp'):
3507         return 'rtsp'
3508
3509     ext = determine_ext(url)
3510     if ext == 'm3u8':
3511         return 'm3u8'
3512     elif ext == 'f4m':
3513         return 'f4m'
3514
3515     return compat_urllib_parse_urlparse(url).scheme
3516
3517
3518 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3519     """ Render a list of rows, each as a list of values.
3520     Text after a \t will be right aligned """
3521     def width(string):
3522         return len(remove_terminal_sequences(string).replace('\t', ''))
3523
3524     def get_max_lens(table):
3525         return [max(width(str(v)) for v in col) for col in zip(*table)]
3526
3527     def filter_using_list(row, filterArray):
3528         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3529
3530     max_lens = get_max_lens(data) if hide_empty else []
3531     header_row = filter_using_list(header_row, max_lens)
3532     data = [filter_using_list(row, max_lens) for row in data]
3533
3534     table = [header_row] + data
3535     max_lens = get_max_lens(table)
3536     extra_gap += 1
3537     if delim:
3538         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3539         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3540     for row in table:
3541         for pos, text in enumerate(map(str, row)):
3542             if '\t' in text:
3543                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3544             else:
3545                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3546     ret = '\n'.join(''.join(row).rstrip() for row in table)
3547     return ret
3548
3549
3550 def _match_one(filter_part, dct, incomplete):
3551     # TODO: Generalize code with YoutubeDL._build_format_filter
3552     STRING_OPERATORS = {
3553         '*=': operator.contains,
3554         '^=': lambda attr, value: attr.startswith(value),
3555         '$=': lambda attr, value: attr.endswith(value),
3556         '~=': lambda attr, value: re.search(value, attr),
3557     }
3558     COMPARISON_OPERATORS = {
3559         **STRING_OPERATORS,
3560         '<=': operator.le,  # "<=" must be defined above "<"
3561         '<': operator.lt,
3562         '>=': operator.ge,
3563         '>': operator.gt,
3564         '=': operator.eq,
3565     }
3566
3567     if isinstance(incomplete, bool):
3568         is_incomplete = lambda _: incomplete
3569     else:
3570         is_incomplete = lambda k: k in incomplete
3571
3572     operator_rex = re.compile(r'''(?x)
3573         (?P<key>[a-z_]+)
3574         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3575         (?:
3576             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3577             (?P<strval>.+?)
3578         )
3579         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3580     m = operator_rex.fullmatch(filter_part.strip())
3581     if m:
3582         m = m.groupdict()
3583         unnegated_op = COMPARISON_OPERATORS[m['op']]
3584         if m['negation']:
3585             op = lambda attr, value: not unnegated_op(attr, value)
3586         else:
3587             op = unnegated_op
3588         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3589         if m['quote']:
3590             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3591         actual_value = dct.get(m['key'])
3592         numeric_comparison = None
3593         if isinstance(actual_value, (int, float)):
3594             # If the original field is a string and matching comparisonvalue is
3595             # a number we should respect the origin of the original field
3596             # and process comparison value as a string (see
3597             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3598             try:
3599                 numeric_comparison = int(comparison_value)
3600             except ValueError:
3601                 numeric_comparison = parse_filesize(comparison_value)
3602                 if numeric_comparison is None:
3603                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3604                 if numeric_comparison is None:
3605                     numeric_comparison = parse_duration(comparison_value)
3606         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3607             raise ValueError('Operator %s only supports string values!' % m['op'])
3608         if actual_value is None:
3609             return is_incomplete(m['key']) or m['none_inclusive']
3610         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3611
3612     UNARY_OPERATORS = {
3613         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3614         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3615     }
3616     operator_rex = re.compile(r'''(?x)
3617         (?P<op>%s)\s*(?P<key>[a-z_]+)
3618         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3619     m = operator_rex.fullmatch(filter_part.strip())
3620     if m:
3621         op = UNARY_OPERATORS[m.group('op')]
3622         actual_value = dct.get(m.group('key'))
3623         if is_incomplete(m.group('key')) and actual_value is None:
3624             return True
3625         return op(actual_value)
3626
3627     raise ValueError('Invalid filter part %r' % filter_part)
3628
3629
3630 def match_str(filter_str, dct, incomplete=False):
3631     """ Filter a dictionary with a simple string syntax.
3632     @returns           Whether the filter passes
3633     @param incomplete  Set of keys that is expected to be missing from dct.
3634                        Can be True/False to indicate all/none of the keys may be missing.
3635                        All conditions on incomplete keys pass if the key is missing
3636     """
3637     return all(
3638         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3639         for filter_part in re.split(r'(?<!\\)&', filter_str))
3640
3641
3642 def match_filter_func(filters):
3643     if not filters:
3644         return None
3645     filters = set(variadic(filters))
3646
3647     interactive = '-' in filters
3648     if interactive:
3649         filters.remove('-')
3650
3651     def _match_func(info_dict, incomplete=False):
3652         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3653             return NO_DEFAULT if interactive and not incomplete else None
3654         else:
3655             video_title = info_dict.get('title') or info_dict.get('id') or 'video'
3656             filter_str = ') | ('.join(map(str.strip, filters))
3657             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3658     return _match_func
3659
3660
3661 def download_range_func(chapters, ranges):
3662     def inner(info_dict, ydl):
3663         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3664                    else 'Cannot match chapters since chapter information is unavailable')
3665         for regex in chapters or []:
3666             for i, chapter in enumerate(info_dict.get('chapters') or []):
3667                 if re.search(regex, chapter['title']):
3668                     warning = None
3669                     yield {**chapter, 'index': i}
3670         if chapters and warning:
3671             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3672
3673         yield from ({'start_time': start, 'end_time': end} for start, end in ranges or [])
3674
3675     return inner
3676
3677
3678 def parse_dfxp_time_expr(time_expr):
3679     if not time_expr:
3680         return
3681
3682     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3683     if mobj:
3684         return float(mobj.group('time_offset'))
3685
3686     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3687     if mobj:
3688         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3689
3690
3691 def srt_subtitles_timecode(seconds):
3692     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3693
3694
3695 def ass_subtitles_timecode(seconds):
3696     time = timetuple_from_msec(seconds * 1000)
3697     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3698
3699
3700 def dfxp2srt(dfxp_data):
3701     '''
3702     @param dfxp_data A bytes-like object containing DFXP data
3703     @returns A unicode object containing converted SRT data
3704     '''
3705     LEGACY_NAMESPACES = (
3706         (b'http://www.w3.org/ns/ttml', [
3707             b'http://www.w3.org/2004/11/ttaf1',
3708             b'http://www.w3.org/2006/04/ttaf1',
3709             b'http://www.w3.org/2006/10/ttaf1',
3710         ]),
3711         (b'http://www.w3.org/ns/ttml#styling', [
3712             b'http://www.w3.org/ns/ttml#style',
3713         ]),
3714     )
3715
3716     SUPPORTED_STYLING = [
3717         'color',
3718         'fontFamily',
3719         'fontSize',
3720         'fontStyle',
3721         'fontWeight',
3722         'textDecoration'
3723     ]
3724
3725     _x = functools.partial(xpath_with_ns, ns_map={
3726         'xml': 'http://www.w3.org/XML/1998/namespace',
3727         'ttml': 'http://www.w3.org/ns/ttml',
3728         'tts': 'http://www.w3.org/ns/ttml#styling',
3729     })
3730
3731     styles = {}
3732     default_style = {}
3733
3734     class TTMLPElementParser:
3735         _out = ''
3736         _unclosed_elements = []
3737         _applied_styles = []
3738
3739         def start(self, tag, attrib):
3740             if tag in (_x('ttml:br'), 'br'):
3741                 self._out += '\n'
3742             else:
3743                 unclosed_elements = []
3744                 style = {}
3745                 element_style_id = attrib.get('style')
3746                 if default_style:
3747                     style.update(default_style)
3748                 if element_style_id:
3749                     style.update(styles.get(element_style_id, {}))
3750                 for prop in SUPPORTED_STYLING:
3751                     prop_val = attrib.get(_x('tts:' + prop))
3752                     if prop_val:
3753                         style[prop] = prop_val
3754                 if style:
3755                     font = ''
3756                     for k, v in sorted(style.items()):
3757                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3758                             continue
3759                         if k == 'color':
3760                             font += ' color="%s"' % v
3761                         elif k == 'fontSize':
3762                             font += ' size="%s"' % v
3763                         elif k == 'fontFamily':
3764                             font += ' face="%s"' % v
3765                         elif k == 'fontWeight' and v == 'bold':
3766                             self._out += '<b>'
3767                             unclosed_elements.append('b')
3768                         elif k == 'fontStyle' and v == 'italic':
3769                             self._out += '<i>'
3770                             unclosed_elements.append('i')
3771                         elif k == 'textDecoration' and v == 'underline':
3772                             self._out += '<u>'
3773                             unclosed_elements.append('u')
3774                     if font:
3775                         self._out += '<font' + font + '>'
3776                         unclosed_elements.append('font')
3777                     applied_style = {}
3778                     if self._applied_styles:
3779                         applied_style.update(self._applied_styles[-1])
3780                     applied_style.update(style)
3781                     self._applied_styles.append(applied_style)
3782                 self._unclosed_elements.append(unclosed_elements)
3783
3784         def end(self, tag):
3785             if tag not in (_x('ttml:br'), 'br'):
3786                 unclosed_elements = self._unclosed_elements.pop()
3787                 for element in reversed(unclosed_elements):
3788                     self._out += '</%s>' % element
3789                 if unclosed_elements and self._applied_styles:
3790                     self._applied_styles.pop()
3791
3792         def data(self, data):
3793             self._out += data
3794
3795         def close(self):
3796             return self._out.strip()
3797
3798     def parse_node(node):
3799         target = TTMLPElementParser()
3800         parser = xml.etree.ElementTree.XMLParser(target=target)
3801         parser.feed(xml.etree.ElementTree.tostring(node))
3802         return parser.close()
3803
3804     for k, v in LEGACY_NAMESPACES:
3805         for ns in v:
3806             dfxp_data = dfxp_data.replace(ns, k)
3807
3808     dfxp = compat_etree_fromstring(dfxp_data)
3809     out = []
3810     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3811
3812     if not paras:
3813         raise ValueError('Invalid dfxp/TTML subtitle')
3814
3815     repeat = False
3816     while True:
3817         for style in dfxp.findall(_x('.//ttml:style')):
3818             style_id = style.get('id') or style.get(_x('xml:id'))
3819             if not style_id:
3820                 continue
3821             parent_style_id = style.get('style')
3822             if parent_style_id:
3823                 if parent_style_id not in styles:
3824                     repeat = True
3825                     continue
3826                 styles[style_id] = styles[parent_style_id].copy()
3827             for prop in SUPPORTED_STYLING:
3828                 prop_val = style.get(_x('tts:' + prop))
3829                 if prop_val:
3830                     styles.setdefault(style_id, {})[prop] = prop_val
3831         if repeat:
3832             repeat = False
3833         else:
3834             break
3835
3836     for p in ('body', 'div'):
3837         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3838         if ele is None:
3839             continue
3840         style = styles.get(ele.get('style'))
3841         if not style:
3842             continue
3843         default_style.update(style)
3844
3845     for para, index in zip(paras, itertools.count(1)):
3846         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3847         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3848         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3849         if begin_time is None:
3850             continue
3851         if not end_time:
3852             if not dur:
3853                 continue
3854             end_time = begin_time + dur
3855         out.append('%d\n%s --> %s\n%s\n\n' % (
3856             index,
3857             srt_subtitles_timecode(begin_time),
3858             srt_subtitles_timecode(end_time),
3859             parse_node(para)))
3860
3861     return ''.join(out)
3862
3863
3864 def cli_option(params, command_option, param, separator=None):
3865     param = params.get(param)
3866     return ([] if param is None
3867             else [command_option, str(param)] if separator is None
3868             else [f'{command_option}{separator}{param}'])
3869
3870
3871 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3872     param = params.get(param)
3873     assert param in (True, False, None)
3874     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
3875
3876
3877 def cli_valueless_option(params, command_option, param, expected_value=True):
3878     return [command_option] if params.get(param) == expected_value else []
3879
3880
3881 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3882     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3883         if use_compat:
3884             return argdict
3885         else:
3886             argdict = None
3887     if argdict is None:
3888         return default
3889     assert isinstance(argdict, dict)
3890
3891     assert isinstance(keys, (list, tuple))
3892     for key_list in keys:
3893         arg_list = list(filter(
3894             lambda x: x is not None,
3895             [argdict.get(key.lower()) for key in variadic(key_list)]))
3896         if arg_list:
3897             return [arg for args in arg_list for arg in args]
3898     return default
3899
3900
3901 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3902     main_key, exe = main_key.lower(), exe.lower()
3903     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3904     keys = [f'{root_key}{k}' for k in (keys or [''])]
3905     if root_key in keys:
3906         if main_key != exe:
3907             keys.append((main_key, exe))
3908         keys.append('default')
3909     else:
3910         use_compat = False
3911     return cli_configuration_args(argdict, keys, default, use_compat)
3912
3913
3914 class ISO639Utils:
3915     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3916     _lang_map = {
3917         'aa': 'aar',
3918         'ab': 'abk',
3919         'ae': 'ave',
3920         'af': 'afr',
3921         'ak': 'aka',
3922         'am': 'amh',
3923         'an': 'arg',
3924         'ar': 'ara',
3925         'as': 'asm',
3926         'av': 'ava',
3927         'ay': 'aym',
3928         'az': 'aze',
3929         'ba': 'bak',
3930         'be': 'bel',
3931         'bg': 'bul',
3932         'bh': 'bih',
3933         'bi': 'bis',
3934         'bm': 'bam',
3935         'bn': 'ben',
3936         'bo': 'bod',
3937         'br': 'bre',
3938         'bs': 'bos',
3939         'ca': 'cat',
3940         'ce': 'che',
3941         'ch': 'cha',
3942         'co': 'cos',
3943         'cr': 'cre',
3944         'cs': 'ces',
3945         'cu': 'chu',
3946         'cv': 'chv',
3947         'cy': 'cym',
3948         'da': 'dan',
3949         'de': 'deu',
3950         'dv': 'div',
3951         'dz': 'dzo',
3952         'ee': 'ewe',
3953         'el': 'ell',
3954         'en': 'eng',
3955         'eo': 'epo',
3956         'es': 'spa',
3957         'et': 'est',
3958         'eu': 'eus',
3959         'fa': 'fas',
3960         'ff': 'ful',
3961         'fi': 'fin',
3962         'fj': 'fij',
3963         'fo': 'fao',
3964         'fr': 'fra',
3965         'fy': 'fry',
3966         'ga': 'gle',
3967         'gd': 'gla',
3968         'gl': 'glg',
3969         'gn': 'grn',
3970         'gu': 'guj',
3971         'gv': 'glv',
3972         'ha': 'hau',
3973         'he': 'heb',
3974         'iw': 'heb',  # Replaced by he in 1989 revision
3975         'hi': 'hin',
3976         'ho': 'hmo',
3977         'hr': 'hrv',
3978         'ht': 'hat',
3979         'hu': 'hun',
3980         'hy': 'hye',
3981         'hz': 'her',
3982         'ia': 'ina',
3983         'id': 'ind',
3984         'in': 'ind',  # Replaced by id in 1989 revision
3985         'ie': 'ile',
3986         'ig': 'ibo',
3987         'ii': 'iii',
3988         'ik': 'ipk',
3989         'io': 'ido',
3990         'is': 'isl',
3991         'it': 'ita',
3992         'iu': 'iku',
3993         'ja': 'jpn',
3994         'jv': 'jav',
3995         'ka': 'kat',
3996         'kg': 'kon',
3997         'ki': 'kik',
3998         'kj': 'kua',
3999         'kk': 'kaz',
4000         'kl': 'kal',
4001         'km': 'khm',
4002         'kn': 'kan',
4003         'ko': 'kor',
4004         'kr': 'kau',
4005         'ks': 'kas',
4006         'ku': 'kur',
4007         'kv': 'kom',
4008         'kw': 'cor',
4009         'ky': 'kir',
4010         'la': 'lat',
4011         'lb': 'ltz',
4012         'lg': 'lug',
4013         'li': 'lim',
4014         'ln': 'lin',
4015         'lo': 'lao',
4016         'lt': 'lit',
4017         'lu': 'lub',
4018         'lv': 'lav',
4019         'mg': 'mlg',
4020         'mh': 'mah',
4021         'mi': 'mri',
4022         'mk': 'mkd',
4023         'ml': 'mal',
4024         'mn': 'mon',
4025         'mr': 'mar',
4026         'ms': 'msa',
4027         'mt': 'mlt',
4028         'my': 'mya',
4029         'na': 'nau',
4030         'nb': 'nob',
4031         'nd': 'nde',
4032         'ne': 'nep',
4033         'ng': 'ndo',
4034         'nl': 'nld',
4035         'nn': 'nno',
4036         'no': 'nor',
4037         'nr': 'nbl',
4038         'nv': 'nav',
4039         'ny': 'nya',
4040         'oc': 'oci',
4041         'oj': 'oji',
4042         'om': 'orm',
4043         'or': 'ori',
4044         'os': 'oss',
4045         'pa': 'pan',
4046         'pi': 'pli',
4047         'pl': 'pol',
4048         'ps': 'pus',
4049         'pt': 'por',
4050         'qu': 'que',
4051         'rm': 'roh',
4052         'rn': 'run',
4053         'ro': 'ron',
4054         'ru': 'rus',
4055         'rw': 'kin',
4056         'sa': 'san',
4057         'sc': 'srd',
4058         'sd': 'snd',
4059         'se': 'sme',
4060         'sg': 'sag',
4061         'si': 'sin',
4062         'sk': 'slk',
4063         'sl': 'slv',
4064         'sm': 'smo',
4065         'sn': 'sna',
4066         'so': 'som',
4067         'sq': 'sqi',
4068         'sr': 'srp',
4069         'ss': 'ssw',
4070         'st': 'sot',
4071         'su': 'sun',
4072         'sv': 'swe',
4073         'sw': 'swa',
4074         'ta': 'tam',
4075         'te': 'tel',
4076         'tg': 'tgk',
4077         'th': 'tha',
4078         'ti': 'tir',
4079         'tk': 'tuk',
4080         'tl': 'tgl',
4081         'tn': 'tsn',
4082         'to': 'ton',
4083         'tr': 'tur',
4084         'ts': 'tso',
4085         'tt': 'tat',
4086         'tw': 'twi',
4087         'ty': 'tah',
4088         'ug': 'uig',
4089         'uk': 'ukr',
4090         'ur': 'urd',
4091         'uz': 'uzb',
4092         've': 'ven',
4093         'vi': 'vie',
4094         'vo': 'vol',
4095         'wa': 'wln',
4096         'wo': 'wol',
4097         'xh': 'xho',
4098         'yi': 'yid',
4099         'ji': 'yid',  # Replaced by yi in 1989 revision
4100         'yo': 'yor',
4101         'za': 'zha',
4102         'zh': 'zho',
4103         'zu': 'zul',
4104     }
4105
4106     @classmethod
4107     def short2long(cls, code):
4108         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4109         return cls._lang_map.get(code[:2])
4110
4111     @classmethod
4112     def long2short(cls, code):
4113         """Convert language code from ISO 639-2/T to ISO 639-1"""
4114         for short_name, long_name in cls._lang_map.items():
4115             if long_name == code:
4116                 return short_name
4117
4118
4119 class ISO3166Utils:
4120     # From http://data.okfn.org/data/core/country-list
4121     _country_map = {
4122         'AF': 'Afghanistan',
4123         'AX': 'Åland Islands',
4124         'AL': 'Albania',
4125         'DZ': 'Algeria',
4126         'AS': 'American Samoa',
4127         'AD': 'Andorra',
4128         'AO': 'Angola',
4129         'AI': 'Anguilla',
4130         'AQ': 'Antarctica',
4131         'AG': 'Antigua and Barbuda',
4132         'AR': 'Argentina',
4133         'AM': 'Armenia',
4134         'AW': 'Aruba',
4135         'AU': 'Australia',
4136         'AT': 'Austria',
4137         'AZ': 'Azerbaijan',
4138         'BS': 'Bahamas',
4139         'BH': 'Bahrain',
4140         'BD': 'Bangladesh',
4141         'BB': 'Barbados',
4142         'BY': 'Belarus',
4143         'BE': 'Belgium',
4144         'BZ': 'Belize',
4145         'BJ': 'Benin',
4146         'BM': 'Bermuda',
4147         'BT': 'Bhutan',
4148         'BO': 'Bolivia, Plurinational State of',
4149         'BQ': 'Bonaire, Sint Eustatius and Saba',
4150         'BA': 'Bosnia and Herzegovina',
4151         'BW': 'Botswana',
4152         'BV': 'Bouvet Island',
4153         'BR': 'Brazil',
4154         'IO': 'British Indian Ocean Territory',
4155         'BN': 'Brunei Darussalam',
4156         'BG': 'Bulgaria',
4157         'BF': 'Burkina Faso',
4158         'BI': 'Burundi',
4159         'KH': 'Cambodia',
4160         'CM': 'Cameroon',
4161         'CA': 'Canada',
4162         'CV': 'Cape Verde',
4163         'KY': 'Cayman Islands',
4164         'CF': 'Central African Republic',
4165         'TD': 'Chad',
4166         'CL': 'Chile',
4167         'CN': 'China',
4168         'CX': 'Christmas Island',
4169         'CC': 'Cocos (Keeling) Islands',
4170         'CO': 'Colombia',
4171         'KM': 'Comoros',
4172         'CG': 'Congo',
4173         'CD': 'Congo, the Democratic Republic of the',
4174         'CK': 'Cook Islands',
4175         'CR': 'Costa Rica',
4176         'CI': 'Côte d\'Ivoire',
4177         'HR': 'Croatia',
4178         'CU': 'Cuba',
4179         'CW': 'Curaçao',
4180         'CY': 'Cyprus',
4181         'CZ': 'Czech Republic',
4182         'DK': 'Denmark',
4183         'DJ': 'Djibouti',
4184         'DM': 'Dominica',
4185         'DO': 'Dominican Republic',
4186         'EC': 'Ecuador',
4187         'EG': 'Egypt',
4188         'SV': 'El Salvador',
4189         'GQ': 'Equatorial Guinea',
4190         'ER': 'Eritrea',
4191         'EE': 'Estonia',
4192         'ET': 'Ethiopia',
4193         'FK': 'Falkland Islands (Malvinas)',
4194         'FO': 'Faroe Islands',
4195         'FJ': 'Fiji',
4196         'FI': 'Finland',
4197         'FR': 'France',
4198         'GF': 'French Guiana',
4199         'PF': 'French Polynesia',
4200         'TF': 'French Southern Territories',
4201         'GA': 'Gabon',
4202         'GM': 'Gambia',
4203         'GE': 'Georgia',
4204         'DE': 'Germany',
4205         'GH': 'Ghana',
4206         'GI': 'Gibraltar',
4207         'GR': 'Greece',
4208         'GL': 'Greenland',
4209         'GD': 'Grenada',
4210         'GP': 'Guadeloupe',
4211         'GU': 'Guam',
4212         'GT': 'Guatemala',
4213         'GG': 'Guernsey',
4214         'GN': 'Guinea',
4215         'GW': 'Guinea-Bissau',
4216         'GY': 'Guyana',
4217         'HT': 'Haiti',
4218         'HM': 'Heard Island and McDonald Islands',
4219         'VA': 'Holy See (Vatican City State)',
4220         'HN': 'Honduras',
4221         'HK': 'Hong Kong',
4222         'HU': 'Hungary',
4223         'IS': 'Iceland',
4224         'IN': 'India',
4225         'ID': 'Indonesia',
4226         'IR': 'Iran, Islamic Republic of',
4227         'IQ': 'Iraq',
4228         'IE': 'Ireland',
4229         'IM': 'Isle of Man',
4230         'IL': 'Israel',
4231         'IT': 'Italy',
4232         'JM': 'Jamaica',
4233         'JP': 'Japan',
4234         'JE': 'Jersey',
4235         'JO': 'Jordan',
4236         'KZ': 'Kazakhstan',
4237         'KE': 'Kenya',
4238         'KI': 'Kiribati',
4239         'KP': 'Korea, Democratic People\'s Republic of',
4240         'KR': 'Korea, Republic of',
4241         'KW': 'Kuwait',
4242         'KG': 'Kyrgyzstan',
4243         'LA': 'Lao People\'s Democratic Republic',
4244         'LV': 'Latvia',
4245         'LB': 'Lebanon',
4246         'LS': 'Lesotho',
4247         'LR': 'Liberia',
4248         'LY': 'Libya',
4249         'LI': 'Liechtenstein',
4250         'LT': 'Lithuania',
4251         'LU': 'Luxembourg',
4252         'MO': 'Macao',
4253         'MK': 'Macedonia, the Former Yugoslav Republic of',
4254         'MG': 'Madagascar',
4255         'MW': 'Malawi',
4256         'MY': 'Malaysia',
4257         'MV': 'Maldives',
4258         'ML': 'Mali',
4259         'MT': 'Malta',
4260         'MH': 'Marshall Islands',
4261         'MQ': 'Martinique',
4262         'MR': 'Mauritania',
4263         'MU': 'Mauritius',
4264         'YT': 'Mayotte',
4265         'MX': 'Mexico',
4266         'FM': 'Micronesia, Federated States of',
4267         'MD': 'Moldova, Republic of',
4268         'MC': 'Monaco',
4269         'MN': 'Mongolia',
4270         'ME': 'Montenegro',
4271         'MS': 'Montserrat',
4272         'MA': 'Morocco',
4273         'MZ': 'Mozambique',
4274         'MM': 'Myanmar',
4275         'NA': 'Namibia',
4276         'NR': 'Nauru',
4277         'NP': 'Nepal',
4278         'NL': 'Netherlands',
4279         'NC': 'New Caledonia',
4280         'NZ': 'New Zealand',
4281         'NI': 'Nicaragua',
4282         'NE': 'Niger',
4283         'NG': 'Nigeria',
4284         'NU': 'Niue',
4285         'NF': 'Norfolk Island',
4286         'MP': 'Northern Mariana Islands',
4287         'NO': 'Norway',
4288         'OM': 'Oman',
4289         'PK': 'Pakistan',
4290         'PW': 'Palau',
4291         'PS': 'Palestine, State of',
4292         'PA': 'Panama',
4293         'PG': 'Papua New Guinea',
4294         'PY': 'Paraguay',
4295         'PE': 'Peru',
4296         'PH': 'Philippines',
4297         'PN': 'Pitcairn',
4298         'PL': 'Poland',
4299         'PT': 'Portugal',
4300         'PR': 'Puerto Rico',
4301         'QA': 'Qatar',
4302         'RE': 'Réunion',
4303         'RO': 'Romania',
4304         'RU': 'Russian Federation',
4305         'RW': 'Rwanda',
4306         'BL': 'Saint Barthélemy',
4307         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4308         'KN': 'Saint Kitts and Nevis',
4309         'LC': 'Saint Lucia',
4310         'MF': 'Saint Martin (French part)',
4311         'PM': 'Saint Pierre and Miquelon',
4312         'VC': 'Saint Vincent and the Grenadines',
4313         'WS': 'Samoa',
4314         'SM': 'San Marino',
4315         'ST': 'Sao Tome and Principe',
4316         'SA': 'Saudi Arabia',
4317         'SN': 'Senegal',
4318         'RS': 'Serbia',
4319         'SC': 'Seychelles',
4320         'SL': 'Sierra Leone',
4321         'SG': 'Singapore',
4322         'SX': 'Sint Maarten (Dutch part)',
4323         'SK': 'Slovakia',
4324         'SI': 'Slovenia',
4325         'SB': 'Solomon Islands',
4326         'SO': 'Somalia',
4327         'ZA': 'South Africa',
4328         'GS': 'South Georgia and the South Sandwich Islands',
4329         'SS': 'South Sudan',
4330         'ES': 'Spain',
4331         'LK': 'Sri Lanka',
4332         'SD': 'Sudan',
4333         'SR': 'Suriname',
4334         'SJ': 'Svalbard and Jan Mayen',
4335         'SZ': 'Swaziland',
4336         'SE': 'Sweden',
4337         'CH': 'Switzerland',
4338         'SY': 'Syrian Arab Republic',
4339         'TW': 'Taiwan, Province of China',
4340         'TJ': 'Tajikistan',
4341         'TZ': 'Tanzania, United Republic of',
4342         'TH': 'Thailand',
4343         'TL': 'Timor-Leste',
4344         'TG': 'Togo',
4345         'TK': 'Tokelau',
4346         'TO': 'Tonga',
4347         'TT': 'Trinidad and Tobago',
4348         'TN': 'Tunisia',
4349         'TR': 'Turkey',
4350         'TM': 'Turkmenistan',
4351         'TC': 'Turks and Caicos Islands',
4352         'TV': 'Tuvalu',
4353         'UG': 'Uganda',
4354         'UA': 'Ukraine',
4355         'AE': 'United Arab Emirates',
4356         'GB': 'United Kingdom',
4357         'US': 'United States',
4358         'UM': 'United States Minor Outlying Islands',
4359         'UY': 'Uruguay',
4360         'UZ': 'Uzbekistan',
4361         'VU': 'Vanuatu',
4362         'VE': 'Venezuela, Bolivarian Republic of',
4363         'VN': 'Viet Nam',
4364         'VG': 'Virgin Islands, British',
4365         'VI': 'Virgin Islands, U.S.',
4366         'WF': 'Wallis and Futuna',
4367         'EH': 'Western Sahara',
4368         'YE': 'Yemen',
4369         'ZM': 'Zambia',
4370         'ZW': 'Zimbabwe',
4371         # Not ISO 3166 codes, but used for IP blocks
4372         'AP': 'Asia/Pacific Region',
4373         'EU': 'Europe',
4374     }
4375
4376     @classmethod
4377     def short2full(cls, code):
4378         """Convert an ISO 3166-2 country code to the corresponding full name"""
4379         return cls._country_map.get(code.upper())
4380
4381
4382 class GeoUtils:
4383     # Major IPv4 address blocks per country
4384     _country_ip_map = {
4385         'AD': '46.172.224.0/19',
4386         'AE': '94.200.0.0/13',
4387         'AF': '149.54.0.0/17',
4388         'AG': '209.59.64.0/18',
4389         'AI': '204.14.248.0/21',
4390         'AL': '46.99.0.0/16',
4391         'AM': '46.70.0.0/15',
4392         'AO': '105.168.0.0/13',
4393         'AP': '182.50.184.0/21',
4394         'AQ': '23.154.160.0/24',
4395         'AR': '181.0.0.0/12',
4396         'AS': '202.70.112.0/20',
4397         'AT': '77.116.0.0/14',
4398         'AU': '1.128.0.0/11',
4399         'AW': '181.41.0.0/18',
4400         'AX': '185.217.4.0/22',
4401         'AZ': '5.197.0.0/16',
4402         'BA': '31.176.128.0/17',
4403         'BB': '65.48.128.0/17',
4404         'BD': '114.130.0.0/16',
4405         'BE': '57.0.0.0/8',
4406         'BF': '102.178.0.0/15',
4407         'BG': '95.42.0.0/15',
4408         'BH': '37.131.0.0/17',
4409         'BI': '154.117.192.0/18',
4410         'BJ': '137.255.0.0/16',
4411         'BL': '185.212.72.0/23',
4412         'BM': '196.12.64.0/18',
4413         'BN': '156.31.0.0/16',
4414         'BO': '161.56.0.0/16',
4415         'BQ': '161.0.80.0/20',
4416         'BR': '191.128.0.0/12',
4417         'BS': '24.51.64.0/18',
4418         'BT': '119.2.96.0/19',
4419         'BW': '168.167.0.0/16',
4420         'BY': '178.120.0.0/13',
4421         'BZ': '179.42.192.0/18',
4422         'CA': '99.224.0.0/11',
4423         'CD': '41.243.0.0/16',
4424         'CF': '197.242.176.0/21',
4425         'CG': '160.113.0.0/16',
4426         'CH': '85.0.0.0/13',
4427         'CI': '102.136.0.0/14',
4428         'CK': '202.65.32.0/19',
4429         'CL': '152.172.0.0/14',
4430         'CM': '102.244.0.0/14',
4431         'CN': '36.128.0.0/10',
4432         'CO': '181.240.0.0/12',
4433         'CR': '201.192.0.0/12',
4434         'CU': '152.206.0.0/15',
4435         'CV': '165.90.96.0/19',
4436         'CW': '190.88.128.0/17',
4437         'CY': '31.153.0.0/16',
4438         'CZ': '88.100.0.0/14',
4439         'DE': '53.0.0.0/8',
4440         'DJ': '197.241.0.0/17',
4441         'DK': '87.48.0.0/12',
4442         'DM': '192.243.48.0/20',
4443         'DO': '152.166.0.0/15',
4444         'DZ': '41.96.0.0/12',
4445         'EC': '186.68.0.0/15',
4446         'EE': '90.190.0.0/15',
4447         'EG': '156.160.0.0/11',
4448         'ER': '196.200.96.0/20',
4449         'ES': '88.0.0.0/11',
4450         'ET': '196.188.0.0/14',
4451         'EU': '2.16.0.0/13',
4452         'FI': '91.152.0.0/13',
4453         'FJ': '144.120.0.0/16',
4454         'FK': '80.73.208.0/21',
4455         'FM': '119.252.112.0/20',
4456         'FO': '88.85.32.0/19',
4457         'FR': '90.0.0.0/9',
4458         'GA': '41.158.0.0/15',
4459         'GB': '25.0.0.0/8',
4460         'GD': '74.122.88.0/21',
4461         'GE': '31.146.0.0/16',
4462         'GF': '161.22.64.0/18',
4463         'GG': '62.68.160.0/19',
4464         'GH': '154.160.0.0/12',
4465         'GI': '95.164.0.0/16',
4466         'GL': '88.83.0.0/19',
4467         'GM': '160.182.0.0/15',
4468         'GN': '197.149.192.0/18',
4469         'GP': '104.250.0.0/19',
4470         'GQ': '105.235.224.0/20',
4471         'GR': '94.64.0.0/13',
4472         'GT': '168.234.0.0/16',
4473         'GU': '168.123.0.0/16',
4474         'GW': '197.214.80.0/20',
4475         'GY': '181.41.64.0/18',
4476         'HK': '113.252.0.0/14',
4477         'HN': '181.210.0.0/16',
4478         'HR': '93.136.0.0/13',
4479         'HT': '148.102.128.0/17',
4480         'HU': '84.0.0.0/14',
4481         'ID': '39.192.0.0/10',
4482         'IE': '87.32.0.0/12',
4483         'IL': '79.176.0.0/13',
4484         'IM': '5.62.80.0/20',
4485         'IN': '117.192.0.0/10',
4486         'IO': '203.83.48.0/21',
4487         'IQ': '37.236.0.0/14',
4488         'IR': '2.176.0.0/12',
4489         'IS': '82.221.0.0/16',
4490         'IT': '79.0.0.0/10',
4491         'JE': '87.244.64.0/18',
4492         'JM': '72.27.0.0/17',
4493         'JO': '176.29.0.0/16',
4494         'JP': '133.0.0.0/8',
4495         'KE': '105.48.0.0/12',
4496         'KG': '158.181.128.0/17',
4497         'KH': '36.37.128.0/17',
4498         'KI': '103.25.140.0/22',
4499         'KM': '197.255.224.0/20',
4500         'KN': '198.167.192.0/19',
4501         'KP': '175.45.176.0/22',
4502         'KR': '175.192.0.0/10',
4503         'KW': '37.36.0.0/14',
4504         'KY': '64.96.0.0/15',
4505         'KZ': '2.72.0.0/13',
4506         'LA': '115.84.64.0/18',
4507         'LB': '178.135.0.0/16',
4508         'LC': '24.92.144.0/20',
4509         'LI': '82.117.0.0/19',
4510         'LK': '112.134.0.0/15',
4511         'LR': '102.183.0.0/16',
4512         'LS': '129.232.0.0/17',
4513         'LT': '78.56.0.0/13',
4514         'LU': '188.42.0.0/16',
4515         'LV': '46.109.0.0/16',
4516         'LY': '41.252.0.0/14',
4517         'MA': '105.128.0.0/11',
4518         'MC': '88.209.64.0/18',
4519         'MD': '37.246.0.0/16',
4520         'ME': '178.175.0.0/17',
4521         'MF': '74.112.232.0/21',
4522         'MG': '154.126.0.0/17',
4523         'MH': '117.103.88.0/21',
4524         'MK': '77.28.0.0/15',
4525         'ML': '154.118.128.0/18',
4526         'MM': '37.111.0.0/17',
4527         'MN': '49.0.128.0/17',
4528         'MO': '60.246.0.0/16',
4529         'MP': '202.88.64.0/20',
4530         'MQ': '109.203.224.0/19',
4531         'MR': '41.188.64.0/18',
4532         'MS': '208.90.112.0/22',
4533         'MT': '46.11.0.0/16',
4534         'MU': '105.16.0.0/12',
4535         'MV': '27.114.128.0/18',
4536         'MW': '102.70.0.0/15',
4537         'MX': '187.192.0.0/11',
4538         'MY': '175.136.0.0/13',
4539         'MZ': '197.218.0.0/15',
4540         'NA': '41.182.0.0/16',
4541         'NC': '101.101.0.0/18',
4542         'NE': '197.214.0.0/18',
4543         'NF': '203.17.240.0/22',
4544         'NG': '105.112.0.0/12',
4545         'NI': '186.76.0.0/15',
4546         'NL': '145.96.0.0/11',
4547         'NO': '84.208.0.0/13',
4548         'NP': '36.252.0.0/15',
4549         'NR': '203.98.224.0/19',
4550         'NU': '49.156.48.0/22',
4551         'NZ': '49.224.0.0/14',
4552         'OM': '5.36.0.0/15',
4553         'PA': '186.72.0.0/15',
4554         'PE': '186.160.0.0/14',
4555         'PF': '123.50.64.0/18',
4556         'PG': '124.240.192.0/19',
4557         'PH': '49.144.0.0/13',
4558         'PK': '39.32.0.0/11',
4559         'PL': '83.0.0.0/11',
4560         'PM': '70.36.0.0/20',
4561         'PR': '66.50.0.0/16',
4562         'PS': '188.161.0.0/16',
4563         'PT': '85.240.0.0/13',
4564         'PW': '202.124.224.0/20',
4565         'PY': '181.120.0.0/14',
4566         'QA': '37.210.0.0/15',
4567         'RE': '102.35.0.0/16',
4568         'RO': '79.112.0.0/13',
4569         'RS': '93.86.0.0/15',
4570         'RU': '5.136.0.0/13',
4571         'RW': '41.186.0.0/16',
4572         'SA': '188.48.0.0/13',
4573         'SB': '202.1.160.0/19',
4574         'SC': '154.192.0.0/11',
4575         'SD': '102.120.0.0/13',
4576         'SE': '78.64.0.0/12',
4577         'SG': '8.128.0.0/10',
4578         'SI': '188.196.0.0/14',
4579         'SK': '78.98.0.0/15',
4580         'SL': '102.143.0.0/17',
4581         'SM': '89.186.32.0/19',
4582         'SN': '41.82.0.0/15',
4583         'SO': '154.115.192.0/18',
4584         'SR': '186.179.128.0/17',
4585         'SS': '105.235.208.0/21',
4586         'ST': '197.159.160.0/19',
4587         'SV': '168.243.0.0/16',
4588         'SX': '190.102.0.0/20',
4589         'SY': '5.0.0.0/16',
4590         'SZ': '41.84.224.0/19',
4591         'TC': '65.255.48.0/20',
4592         'TD': '154.68.128.0/19',
4593         'TG': '196.168.0.0/14',
4594         'TH': '171.96.0.0/13',
4595         'TJ': '85.9.128.0/18',
4596         'TK': '27.96.24.0/21',
4597         'TL': '180.189.160.0/20',
4598         'TM': '95.85.96.0/19',
4599         'TN': '197.0.0.0/11',
4600         'TO': '175.176.144.0/21',
4601         'TR': '78.160.0.0/11',
4602         'TT': '186.44.0.0/15',
4603         'TV': '202.2.96.0/19',
4604         'TW': '120.96.0.0/11',
4605         'TZ': '156.156.0.0/14',
4606         'UA': '37.52.0.0/14',
4607         'UG': '102.80.0.0/13',
4608         'US': '6.0.0.0/8',
4609         'UY': '167.56.0.0/13',
4610         'UZ': '84.54.64.0/18',
4611         'VA': '212.77.0.0/19',
4612         'VC': '207.191.240.0/21',
4613         'VE': '186.88.0.0/13',
4614         'VG': '66.81.192.0/20',
4615         'VI': '146.226.0.0/16',
4616         'VN': '14.160.0.0/11',
4617         'VU': '202.80.32.0/20',
4618         'WF': '117.20.32.0/21',
4619         'WS': '202.4.32.0/19',
4620         'YE': '134.35.0.0/16',
4621         'YT': '41.242.116.0/22',
4622         'ZA': '41.0.0.0/11',
4623         'ZM': '102.144.0.0/13',
4624         'ZW': '102.177.192.0/18',
4625     }
4626
4627     @classmethod
4628     def random_ipv4(cls, code_or_block):
4629         if len(code_or_block) == 2:
4630             block = cls._country_ip_map.get(code_or_block.upper())
4631             if not block:
4632                 return None
4633         else:
4634             block = code_or_block
4635         addr, preflen = block.split('/')
4636         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
4637         addr_max = addr_min | (0xffffffff >> int(preflen))
4638         return compat_str(socket.inet_ntoa(
4639             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
4640
4641
4642 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
4643     def __init__(self, proxies=None):
4644         # Set default handlers
4645         for type in ('http', 'https'):
4646             setattr(self, '%s_open' % type,
4647                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4648                         meth(r, proxy, type))
4649         compat_urllib_request.ProxyHandler.__init__(self, proxies)
4650
4651     def proxy_open(self, req, proxy, type):
4652         req_proxy = req.headers.get('Ytdl-request-proxy')
4653         if req_proxy is not None:
4654             proxy = req_proxy
4655             del req.headers['Ytdl-request-proxy']
4656
4657         if proxy == '__noproxy__':
4658             return None  # No Proxy
4659         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4660             req.add_header('Ytdl-socks-proxy', proxy)
4661             # yt-dlp's http/https handlers do wrapping the socket with socks
4662             return None
4663         return compat_urllib_request.ProxyHandler.proxy_open(
4664             self, req, proxy, type)
4665
4666
4667 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4668 # released into Public Domain
4669 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4670
4671 def long_to_bytes(n, blocksize=0):
4672     """long_to_bytes(n:long, blocksize:int) : string
4673     Convert a long integer to a byte string.
4674
4675     If optional blocksize is given and greater than zero, pad the front of the
4676     byte string with binary zeros so that the length is a multiple of
4677     blocksize.
4678     """
4679     # after much testing, this algorithm was deemed to be the fastest
4680     s = b''
4681     n = int(n)
4682     while n > 0:
4683         s = compat_struct_pack('>I', n & 0xffffffff) + s
4684         n = n >> 32
4685     # strip off leading zeros
4686     for i in range(len(s)):
4687         if s[i] != b'\000'[0]:
4688             break
4689     else:
4690         # only happens when n == 0
4691         s = b'\000'
4692         i = 0
4693     s = s[i:]
4694     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4695     # de-padding being done above, but sigh...
4696     if blocksize > 0 and len(s) % blocksize:
4697         s = (blocksize - len(s) % blocksize) * b'\000' + s
4698     return s
4699
4700
4701 def bytes_to_long(s):
4702     """bytes_to_long(string) : long
4703     Convert a byte string to a long integer.
4704
4705     This is (essentially) the inverse of long_to_bytes().
4706     """
4707     acc = 0
4708     length = len(s)
4709     if length % 4:
4710         extra = (4 - length % 4)
4711         s = b'\000' * extra + s
4712         length = length + extra
4713     for i in range(0, length, 4):
4714         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
4715     return acc
4716
4717
4718 def ohdave_rsa_encrypt(data, exponent, modulus):
4719     '''
4720     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4721
4722     Input:
4723         data: data to encrypt, bytes-like object
4724         exponent, modulus: parameter e and N of RSA algorithm, both integer
4725     Output: hex string of encrypted data
4726
4727     Limitation: supports one block encryption only
4728     '''
4729
4730     payload = int(binascii.hexlify(data[::-1]), 16)
4731     encrypted = pow(payload, exponent, modulus)
4732     return '%x' % encrypted
4733
4734
4735 def pkcs1pad(data, length):
4736     """
4737     Padding input data with PKCS#1 scheme
4738
4739     @param {int[]} data        input data
4740     @param {int}   length      target length
4741     @returns {int[]}           padded data
4742     """
4743     if len(data) > length - 11:
4744         raise ValueError('Input data too long for PKCS#1 padding')
4745
4746     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4747     return [0, 2] + pseudo_random + [0] + data
4748
4749
4750 def encode_base_n(num, n, table=None):
4751     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
4752     if not table:
4753         table = FULL_TABLE[:n]
4754
4755     if n > len(table):
4756         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
4757
4758     if num == 0:
4759         return table[0]
4760
4761     ret = ''
4762     while num:
4763         ret = table[num % n] + ret
4764         num = num // n
4765     return ret
4766
4767
4768 def decode_packed_codes(code):
4769     mobj = re.search(PACKED_CODES_RE, code)
4770     obfuscated_code, base, count, symbols = mobj.groups()
4771     base = int(base)
4772     count = int(count)
4773     symbols = symbols.split('|')
4774     symbol_table = {}
4775
4776     while count:
4777         count -= 1
4778         base_n_count = encode_base_n(count, base)
4779         symbol_table[base_n_count] = symbols[count] or base_n_count
4780
4781     return re.sub(
4782         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4783         obfuscated_code)
4784
4785
4786 def caesar(s, alphabet, shift):
4787     if shift == 0:
4788         return s
4789     l = len(alphabet)
4790     return ''.join(
4791         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4792         for c in s)
4793
4794
4795 def rot47(s):
4796     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4797
4798
4799 def parse_m3u8_attributes(attrib):
4800     info = {}
4801     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4802         if val.startswith('"'):
4803             val = val[1:-1]
4804         info[key] = val
4805     return info
4806
4807
4808 def urshift(val, n):
4809     return val >> n if val >= 0 else (val + 0x100000000) >> n
4810
4811
4812 # Based on png2str() written by @gdkchan and improved by @yokrysty
4813 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4814 def decode_png(png_data):
4815     # Reference: https://www.w3.org/TR/PNG/
4816     header = png_data[8:]
4817
4818     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4819         raise OSError('Not a valid PNG file.')
4820
4821     int_map = {1: '>B', 2: '>H', 4: '>I'}
4822     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
4823
4824     chunks = []
4825
4826     while header:
4827         length = unpack_integer(header[:4])
4828         header = header[4:]
4829
4830         chunk_type = header[:4]
4831         header = header[4:]
4832
4833         chunk_data = header[:length]
4834         header = header[length:]
4835
4836         header = header[4:]  # Skip CRC
4837
4838         chunks.append({
4839             'type': chunk_type,
4840             'length': length,
4841             'data': chunk_data
4842         })
4843
4844     ihdr = chunks[0]['data']
4845
4846     width = unpack_integer(ihdr[:4])
4847     height = unpack_integer(ihdr[4:8])
4848
4849     idat = b''
4850
4851     for chunk in chunks:
4852         if chunk['type'] == b'IDAT':
4853             idat += chunk['data']
4854
4855     if not idat:
4856         raise OSError('Unable to read PNG data.')
4857
4858     decompressed_data = bytearray(zlib.decompress(idat))
4859
4860     stride = width * 3
4861     pixels = []
4862
4863     def _get_pixel(idx):
4864         x = idx % stride
4865         y = idx // stride
4866         return pixels[y][x]
4867
4868     for y in range(height):
4869         basePos = y * (1 + stride)
4870         filter_type = decompressed_data[basePos]
4871
4872         current_row = []
4873
4874         pixels.append(current_row)
4875
4876         for x in range(stride):
4877             color = decompressed_data[1 + basePos + x]
4878             basex = y * stride + x
4879             left = 0
4880             up = 0
4881
4882             if x > 2:
4883                 left = _get_pixel(basex - 3)
4884             if y > 0:
4885                 up = _get_pixel(basex - stride)
4886
4887             if filter_type == 1:  # Sub
4888                 color = (color + left) & 0xff
4889             elif filter_type == 2:  # Up
4890                 color = (color + up) & 0xff
4891             elif filter_type == 3:  # Average
4892                 color = (color + ((left + up) >> 1)) & 0xff
4893             elif filter_type == 4:  # Paeth
4894                 a = left
4895                 b = up
4896                 c = 0
4897
4898                 if x > 2 and y > 0:
4899                     c = _get_pixel(basex - stride - 3)
4900
4901                 p = a + b - c
4902
4903                 pa = abs(p - a)
4904                 pb = abs(p - b)
4905                 pc = abs(p - c)
4906
4907                 if pa <= pb and pa <= pc:
4908                     color = (color + a) & 0xff
4909                 elif pb <= pc:
4910                     color = (color + b) & 0xff
4911                 else:
4912                     color = (color + c) & 0xff
4913
4914             current_row.append(color)
4915
4916     return width, height, pixels
4917
4918
4919 def write_xattr(path, key, value):
4920     # Windows: Write xattrs to NTFS Alternate Data Streams:
4921     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4922     if compat_os_name == 'nt':
4923         assert ':' not in key
4924         assert os.path.exists(path)
4925
4926         try:
4927             with open(f'{path}:{key}', 'wb') as f:
4928                 f.write(value)
4929         except OSError as e:
4930             raise XAttrMetadataError(e.errno, e.strerror)
4931         return
4932
4933     # UNIX Method 1. Use xattrs/pyxattrs modules
4934     from .dependencies import xattr
4935
4936     setxattr = None
4937     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
4938         # Unicode arguments are not supported in pyxattr until version 0.5.0
4939         # See https://github.com/ytdl-org/youtube-dl/issues/5498
4940         if version_tuple(xattr.__version__) >= (0, 5, 0):
4941             setxattr = xattr.set
4942     elif xattr:
4943         setxattr = xattr.setxattr
4944
4945     if setxattr:
4946         try:
4947             setxattr(path, key, value)
4948         except OSError as e:
4949             raise XAttrMetadataError(e.errno, e.strerror)
4950         return
4951
4952     # UNIX Method 2. Use setfattr/xattr executables
4953     exe = ('setfattr' if check_executable('setfattr', ['--version'])
4954            else 'xattr' if check_executable('xattr', ['-h']) else None)
4955     if not exe:
4956         raise XAttrUnavailableError(
4957             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
4958             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
4959
4960     value = value.decode()
4961     try:
4962         _, stderr, returncode = Popen.run(
4963             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
4964             stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
4965     except OSError as e:
4966         raise XAttrMetadataError(e.errno, e.strerror)
4967     if returncode:
4968         raise XAttrMetadataError(returncode, stderr)
4969
4970
4971 def random_birthday(year_field, month_field, day_field):
4972     start_date = datetime.date(1950, 1, 1)
4973     end_date = datetime.date(1995, 12, 31)
4974     offset = random.randint(0, (end_date - start_date).days)
4975     random_date = start_date + datetime.timedelta(offset)
4976     return {
4977         year_field: str(random_date.year),
4978         month_field: str(random_date.month),
4979         day_field: str(random_date.day),
4980     }
4981
4982
4983 # Templates for internet shortcut files, which are plain text files.
4984 DOT_URL_LINK_TEMPLATE = '''\
4985 [InternetShortcut]
4986 URL=%(url)s
4987 '''
4988
4989 DOT_WEBLOC_LINK_TEMPLATE = '''\
4990 <?xml version="1.0" encoding="UTF-8"?>
4991 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4992 <plist version="1.0">
4993 <dict>
4994 \t<key>URL</key>
4995 \t<string>%(url)s</string>
4996 </dict>
4997 </plist>
4998 '''
4999
5000 DOT_DESKTOP_LINK_TEMPLATE = '''\
5001 [Desktop Entry]
5002 Encoding=UTF-8
5003 Name=%(filename)s
5004 Type=Link
5005 URL=%(url)s
5006 Icon=text-html
5007 '''
5008
5009 LINK_TEMPLATES = {
5010     'url': DOT_URL_LINK_TEMPLATE,
5011     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5012     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5013 }
5014
5015
5016 def iri_to_uri(iri):
5017     """
5018     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5019
5020     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5021     """
5022
5023     iri_parts = compat_urllib_parse_urlparse(iri)
5024
5025     if '[' in iri_parts.netloc:
5026         raise ValueError('IPv6 URIs are not, yet, supported.')
5027         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5028
5029     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5030
5031     net_location = ''
5032     if iri_parts.username:
5033         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5034         if iri_parts.password is not None:
5035             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5036         net_location += '@'
5037
5038     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5039     # The 'idna' encoding produces ASCII text.
5040     if iri_parts.port is not None and iri_parts.port != 80:
5041         net_location += ':' + str(iri_parts.port)
5042
5043     return urllib.parse.urlunparse(
5044         (iri_parts.scheme,
5045             net_location,
5046
5047             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5048
5049             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5050             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5051
5052             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5053             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5054
5055             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5056
5057     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5058
5059
5060 def to_high_limit_path(path):
5061     if sys.platform in ['win32', 'cygwin']:
5062         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5063         return '\\\\?\\' + os.path.abspath(path)
5064
5065     return path
5066
5067
5068 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=None):
5069     val = traverse_obj(obj, *variadic(field))
5070     if (not val and val != 0) if ignore is NO_DEFAULT else val in ignore:
5071         return default
5072     return template % (func(val) if func else val)
5073
5074
5075 def clean_podcast_url(url):
5076     return re.sub(r'''(?x)
5077         (?:
5078             (?:
5079                 chtbl\.com/track|
5080                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5081                 play\.podtrac\.com
5082             )/[^/]+|
5083             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5084             flex\.acast\.com|
5085             pd(?:
5086                 cn\.co| # https://podcorn.com/analytics-prefix/
5087                 st\.fm # https://podsights.com/docs/
5088             )/e
5089         )/''', '', url)
5090
5091
5092 _HEX_TABLE = '0123456789abcdef'
5093
5094
5095 def random_uuidv4():
5096     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5097
5098
5099 def make_dir(path, to_screen=None):
5100     try:
5101         dn = os.path.dirname(path)
5102         if dn and not os.path.exists(dn):
5103             os.makedirs(dn)
5104         return True
5105     except OSError as err:
5106         if callable(to_screen) is not None:
5107             to_screen('unable to create directory ' + error_to_compat_str(err))
5108         return False
5109
5110
5111 def get_executable_path():
5112     from .update import _get_variant_and_executable_path
5113
5114     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5115
5116
5117 def load_plugins(name, suffix, namespace):
5118     classes = {}
5119     with contextlib.suppress(FileNotFoundError):
5120         plugins_spec = importlib.util.spec_from_file_location(
5121             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5122         plugins = importlib.util.module_from_spec(plugins_spec)
5123         sys.modules[plugins_spec.name] = plugins
5124         plugins_spec.loader.exec_module(plugins)
5125         for name in dir(plugins):
5126             if name in namespace:
5127                 continue
5128             if not name.endswith(suffix):
5129                 continue
5130             klass = getattr(plugins, name)
5131             classes[name] = namespace[name] = klass
5132     return classes
5133
5134
5135 def traverse_obj(
5136         obj, *path_list, default=None, expected_type=None, get_all=True,
5137         casesense=True, is_user_input=False, traverse_string=False):
5138     ''' Traverse nested list/dict/tuple
5139     @param path_list        A list of paths which are checked one by one.
5140                             Each path is a list of keys where each key is a:
5141                               - None:     Do nothing
5142                               - string:   A dictionary key
5143                               - int:      An index into a list
5144                               - tuple:    A list of keys all of which will be traversed
5145                               - Ellipsis: Fetch all values in the object
5146                               - Function: Takes the key and value as arguments
5147                                           and returns whether the key matches or not
5148     @param default          Default value to return
5149     @param expected_type    Only accept final value of this type (Can also be any callable)
5150     @param get_all          Return all the values obtained from a path or only the first one
5151     @param casesense        Whether to consider dictionary keys as case sensitive
5152     @param is_user_input    Whether the keys are generated from user input. If True,
5153                             strings are converted to int/slice if necessary
5154     @param traverse_string  Whether to traverse inside strings. If True, any
5155                             non-compatible object will also be converted into a string
5156     # TODO: Write tests
5157     '''
5158     if not casesense:
5159         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
5160         path_list = (map(_lower, variadic(path)) for path in path_list)
5161
5162     def _traverse_obj(obj, path, _current_depth=0):
5163         nonlocal depth
5164         path = tuple(variadic(path))
5165         for i, key in enumerate(path):
5166             if None in (key, obj):
5167                 return obj
5168             if isinstance(key, (list, tuple)):
5169                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
5170                 key = ...
5171             if key is ...:
5172                 obj = (obj.values() if isinstance(obj, dict)
5173                        else obj if isinstance(obj, (list, tuple, LazyList))
5174                        else str(obj) if traverse_string else [])
5175                 _current_depth += 1
5176                 depth = max(depth, _current_depth)
5177                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
5178             elif callable(key):
5179                 if isinstance(obj, (list, tuple, LazyList)):
5180                     obj = enumerate(obj)
5181                 elif isinstance(obj, dict):
5182                     obj = obj.items()
5183                 else:
5184                     if not traverse_string:
5185                         return None
5186                     obj = str(obj)
5187                 _current_depth += 1
5188                 depth = max(depth, _current_depth)
5189                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
5190             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
5191                 obj = (obj.get(key) if casesense or (key in obj)
5192                        else next((v for k, v in obj.items() if _lower(k) == key), None))
5193             else:
5194                 if is_user_input:
5195                     key = (int_or_none(key) if ':' not in key
5196                            else slice(*map(int_or_none, key.split(':'))))
5197                     if key == slice(None):
5198                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
5199                 if not isinstance(key, (int, slice)):
5200                     return None
5201                 if not isinstance(obj, (list, tuple, LazyList)):
5202                     if not traverse_string:
5203                         return None
5204                     obj = str(obj)
5205                 try:
5206                     obj = obj[key]
5207                 except IndexError:
5208                     return None
5209         return obj
5210
5211     if isinstance(expected_type, type):
5212         type_test = lambda val: val if isinstance(val, expected_type) else None
5213     elif expected_type is not None:
5214         type_test = expected_type
5215     else:
5216         type_test = lambda val: val
5217
5218     for path in path_list:
5219         depth = 0
5220         val = _traverse_obj(obj, path)
5221         if val is not None:
5222             if depth:
5223                 for _ in range(depth - 1):
5224                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5225                 val = [v for v in map(type_test, val) if v is not None]
5226                 if val:
5227                     return val if get_all else val[0]
5228             else:
5229                 val = type_test(val)
5230                 if val is not None:
5231                     return val
5232     return default
5233
5234
5235 def traverse_dict(dictn, keys, casesense=True):
5236     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5237                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5238     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5239
5240
5241 def get_first(obj, keys, **kwargs):
5242     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5243
5244
5245 def variadic(x, allowed_types=(str, bytes, dict)):
5246     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5247
5248
5249 def decode_base(value, digits):
5250     # This will convert given base-x string to scalar (long or int)
5251     table = {char: index for index, char in enumerate(digits)}
5252     result = 0
5253     base = len(digits)
5254     for chr in value:
5255         result *= base
5256         result += table[chr]
5257     return result
5258
5259
5260 def time_seconds(**kwargs):
5261     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5262     return t.timestamp()
5263
5264
5265 # create a JSON Web Signature (jws) with HS256 algorithm
5266 # the resulting format is in JWS Compact Serialization
5267 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5268 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5269 def jwt_encode_hs256(payload_data, key, headers={}):
5270     header_data = {
5271         'alg': 'HS256',
5272         'typ': 'JWT',
5273     }
5274     if headers:
5275         header_data.update(headers)
5276     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5277     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5278     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5279     signature_b64 = base64.b64encode(h.digest())
5280     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5281     return token
5282
5283
5284 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5285 def jwt_decode_hs256(jwt):
5286     header_b64, payload_b64, signature_b64 = jwt.split('.')
5287     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5288     return payload_data
5289
5290
5291 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5292
5293
5294 @functools.cache
5295 def supports_terminal_sequences(stream):
5296     if compat_os_name == 'nt':
5297         if not WINDOWS_VT_MODE:
5298             return False
5299     elif not os.getenv('TERM'):
5300         return False
5301     try:
5302         return stream.isatty()
5303     except BaseException:
5304         return False
5305
5306
5307 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5308     if get_windows_version() < (10, 0, 10586):
5309         return
5310     global WINDOWS_VT_MODE
5311     try:
5312         Popen.run('', shell=True)
5313     except Exception:
5314         return
5315
5316     WINDOWS_VT_MODE = True
5317     supports_terminal_sequences.cache_clear()
5318
5319
5320 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5321
5322
5323 def remove_terminal_sequences(string):
5324     return _terminal_sequences_re.sub('', string)
5325
5326
5327 def number_of_digits(number):
5328     return len('%d' % number)
5329
5330
5331 def join_nonempty(*values, delim='-', from_dict=None):
5332     if from_dict is not None:
5333         values = map(from_dict.get, values)
5334     return delim.join(map(str, filter(None, values)))
5335
5336
5337 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5338     """
5339     Find the largest format dimensions in terms of video width and, for each thumbnail:
5340     * Modify the URL: Match the width with the provided regex and replace with the former width
5341     * Update dimensions
5342
5343     This function is useful with video services that scale the provided thumbnails on demand
5344     """
5345     _keys = ('width', 'height')
5346     max_dimensions = max(
5347         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5348         default=(0, 0))
5349     if not max_dimensions[0]:
5350         return thumbnails
5351     return [
5352         merge_dicts(
5353             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5354             dict(zip(_keys, max_dimensions)), thumbnail)
5355         for thumbnail in thumbnails
5356     ]
5357
5358
5359 def parse_http_range(range):
5360     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5361     if not range:
5362         return None, None, None
5363     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5364     if not crg:
5365         return None, None, None
5366     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5367
5368
5369 def read_stdin(what):
5370     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5371     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5372     return sys.stdin
5373
5374
5375 class Config:
5376     own_args = None
5377     parsed_args = None
5378     filename = None
5379     __initialized = False
5380
5381     def __init__(self, parser, label=None):
5382         self.parser, self.label = parser, label
5383         self._loaded_paths, self.configs = set(), []
5384
5385     def init(self, args=None, filename=None):
5386         assert not self.__initialized
5387         directory = ''
5388         if filename:
5389             location = os.path.realpath(filename)
5390             directory = os.path.dirname(location)
5391             if location in self._loaded_paths:
5392                 return False
5393             self._loaded_paths.add(location)
5394
5395         self.own_args, self.__initialized = args, True
5396         opts, _ = self.parser.parse_known_args(args)
5397         self.parsed_args, self.filename = args, filename
5398
5399         for location in opts.config_locations or []:
5400             if location == '-':
5401                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5402                 continue
5403             location = os.path.join(directory, expand_path(location))
5404             if os.path.isdir(location):
5405                 location = os.path.join(location, 'yt-dlp.conf')
5406             if not os.path.exists(location):
5407                 self.parser.error(f'config location {location} does not exist')
5408             self.append_config(self.read_file(location), location)
5409         return True
5410
5411     def __str__(self):
5412         label = join_nonempty(
5413             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5414             delim=' ')
5415         return join_nonempty(
5416             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5417             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5418             delim='\n')
5419
5420     @staticmethod
5421     def read_file(filename, default=[]):
5422         try:
5423             optionf = open(filename)
5424         except OSError:
5425             return default  # silently skip if file is not present
5426         try:
5427             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5428             contents = optionf.read()
5429             res = shlex.split(contents, comments=True)
5430         finally:
5431             optionf.close()
5432         return res
5433
5434     @staticmethod
5435     def hide_login_info(opts):
5436         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5437         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5438
5439         def _scrub_eq(o):
5440             m = eqre.match(o)
5441             if m:
5442                 return m.group('key') + '=PRIVATE'
5443             else:
5444                 return o
5445
5446         opts = list(map(_scrub_eq, opts))
5447         for idx, opt in enumerate(opts):
5448             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5449                 opts[idx + 1] = 'PRIVATE'
5450         return opts
5451
5452     def append_config(self, *args, label=None):
5453         config = type(self)(self.parser, label)
5454         config._loaded_paths = self._loaded_paths
5455         if config.init(*args):
5456             self.configs.append(config)
5457
5458     @property
5459     def all_args(self):
5460         for config in reversed(self.configs):
5461             yield from config.all_args
5462         yield from self.parsed_args or []
5463
5464     def parse_known_args(self, **kwargs):
5465         return self.parser.parse_known_args(self.all_args, **kwargs)
5466
5467     def parse_args(self):
5468         return self.parser.parse_args(self.all_args)
5469
5470
5471 class WebSocketsWrapper():
5472     """Wraps websockets module to use in non-async scopes"""
5473     pool = None
5474
5475     def __init__(self, url, headers=None, connect=True):
5476         self.loop = asyncio.new_event_loop()
5477         # XXX: "loop" is deprecated
5478         self.conn = websockets.connect(
5479             url, extra_headers=headers, ping_interval=None,
5480             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5481         if connect:
5482             self.__enter__()
5483         atexit.register(self.__exit__, None, None, None)
5484
5485     def __enter__(self):
5486         if not self.pool:
5487             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5488         return self
5489
5490     def send(self, *args):
5491         self.run_with_loop(self.pool.send(*args), self.loop)
5492
5493     def recv(self, *args):
5494         return self.run_with_loop(self.pool.recv(*args), self.loop)
5495
5496     def __exit__(self, type, value, traceback):
5497         try:
5498             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5499         finally:
5500             self.loop.close()
5501             self._cancel_all_tasks(self.loop)
5502
5503     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5504     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5505     @staticmethod
5506     def run_with_loop(main, loop):
5507         if not asyncio.iscoroutine(main):
5508             raise ValueError(f'a coroutine was expected, got {main!r}')
5509
5510         try:
5511             return loop.run_until_complete(main)
5512         finally:
5513             loop.run_until_complete(loop.shutdown_asyncgens())
5514             if hasattr(loop, 'shutdown_default_executor'):
5515                 loop.run_until_complete(loop.shutdown_default_executor())
5516
5517     @staticmethod
5518     def _cancel_all_tasks(loop):
5519         to_cancel = asyncio.all_tasks(loop)
5520
5521         if not to_cancel:
5522             return
5523
5524         for task in to_cancel:
5525             task.cancel()
5526
5527         # XXX: "loop" is removed in python 3.10+
5528         loop.run_until_complete(
5529             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5530
5531         for task in to_cancel:
5532             if task.cancelled():
5533                 continue
5534             if task.exception() is not None:
5535                 loop.call_exception_handler({
5536                     'message': 'unhandled exception during asyncio.run() shutdown',
5537                     'exception': task.exception(),
5538                     'task': task,
5539                 })
5540
5541
5542 def merge_headers(*dicts):
5543     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5544     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5545
5546
5547 class classproperty:
5548     """classmethod(property(func)) that works in py < 3.9"""
5549
5550     def __init__(self, func):
5551         functools.update_wrapper(self, func)
5552         self.func = func
5553
5554     def __get__(self, _, cls):
5555         return self.func(cls)
5556
5557
5558 class Namespace(types.SimpleNamespace):
5559     """Immutable namespace"""
5560
5561     def __iter__(self):
5562         return iter(self.__dict__.values())
5563
5564     @property
5565     def items_(self):
5566         return self.__dict__.items()
5567
5568
5569 # Deprecated
5570 has_certifi = bool(certifi)
5571 has_websockets = bool(websockets)