yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import contextlib
   9 import ctypes
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import importlib.util
  18 import io
  19 import itertools
  20 import json
  21 import locale
  22 import math
  23 import mimetypes
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import shlex
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import types
  38 import urllib.parse
  39 import xml.etree.ElementTree
  40 import zlib
  41
  42 from .compat import asyncio, functools  # isort: split
  43 from .compat import (
  44     compat_chr,
  45     compat_cookiejar,
  46     compat_etree_fromstring,
  47     compat_expanduser,
  48     compat_html_entities,
  49     compat_html_entities_html5,
  50     compat_HTMLParseError,
  51     compat_HTMLParser,
  52     compat_http_client,
  53     compat_HTTPError,
  54     compat_os_name,
  55     compat_parse_qs,
  56     compat_shlex_quote,
  57     compat_str,
  58     compat_struct_pack,
  59     compat_struct_unpack,
  60     compat_urllib_error,
  61     compat_urllib_parse_unquote_plus,
  62     compat_urllib_parse_urlencode,
  63     compat_urllib_parse_urlparse,
  64     compat_urllib_request,
  65     compat_urlparse,
  66 )
  67 from .dependencies import brotli, certifi, websockets
  68 from .socks import ProxyType, sockssocket
  69
  70
  71 def register_socks_protocols():
  72     # "Register" SOCKS protocols
  73     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  74     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  75     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  76         if scheme not in compat_urlparse.uses_netloc:
  77             compat_urlparse.uses_netloc.append(scheme)
  78
  79
  80 # This is not clearly defined otherwise
  81 compiled_regex_type = type(re.compile(''))
  82
  83
  84 def random_user_agent():
  85     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  86     _CHROME_VERSIONS = (
  87         '90.0.4430.212',
  88         '90.0.4430.24',
  89         '90.0.4430.70',
  90         '90.0.4430.72',
  91         '90.0.4430.85',
  92         '90.0.4430.93',
  93         '91.0.4472.101',
  94         '91.0.4472.106',
  95         '91.0.4472.114',
  96         '91.0.4472.124',
  97         '91.0.4472.164',
  98         '91.0.4472.19',
  99         '91.0.4472.77',
 100         '92.0.4515.107',
 101         '92.0.4515.115',
 102         '92.0.4515.131',
 103         '92.0.4515.159',
 104         '92.0.4515.43',
 105         '93.0.4556.0',
 106         '93.0.4577.15',
 107         '93.0.4577.63',
 108         '93.0.4577.82',
 109         '94.0.4606.41',
 110         '94.0.4606.54',
 111         '94.0.4606.61',
 112         '94.0.4606.71',
 113         '94.0.4606.81',
 114         '94.0.4606.85',
 115         '95.0.4638.17',
 116         '95.0.4638.50',
 117         '95.0.4638.54',
 118         '95.0.4638.69',
 119         '95.0.4638.74',
 120         '96.0.4664.18',
 121         '96.0.4664.45',
 122         '96.0.4664.55',
 123         '96.0.4664.93',
 124         '97.0.4692.20',
 125     )
 126     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 127
 128
 129 SUPPORTED_ENCODINGS = [
 130     'gzip', 'deflate'
 131 ]
 132 if brotli:
 133     SUPPORTED_ENCODINGS.append('br')
 134
 135 std_headers = {
 136     'User-Agent': random_user_agent(),
 137     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 138     'Accept-Language': 'en-us,en;q=0.5',
 139     'Sec-Fetch-Mode': 'navigate',
 140 }
 141
 142
 143 USER_AGENTS = {
 144     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 145 }
 146
 147
 148 NO_DEFAULT = object()
 149
 150 ENGLISH_MONTH_NAMES = [
 151     'January', 'February', 'March', 'April', 'May', 'June',
 152     'July', 'August', 'September', 'October', 'November', 'December']
 153
 154 MONTH_NAMES = {
 155     'en': ENGLISH_MONTH_NAMES,
 156     'fr': [
 157         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 158         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 159 }
 160
 161 KNOWN_EXTENSIONS = (
 162     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 163     'flv', 'f4v', 'f4a', 'f4b',
 164     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 165     'mkv', 'mka', 'mk3d',
 166     'avi', 'divx',
 167     'mov',
 168     'asf', 'wmv', 'wma',
 169     '3gp', '3g2',
 170     'mp3',
 171     'flac',
 172     'ape',
 173     'wav',
 174     'f4f', 'f4m', 'm3u8', 'smil')
 175
 176 # needed for sanitizing filenames in restricted mode
 177 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 178                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 179                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 180
 181 DATE_FORMATS = (
 182     '%d %B %Y',
 183     '%d %b %Y',
 184     '%B %d %Y',
 185     '%B %dst %Y',
 186     '%B %dnd %Y',
 187     '%B %drd %Y',
 188     '%B %dth %Y',
 189     '%b %d %Y',
 190     '%b %dst %Y',
 191     '%b %dnd %Y',
 192     '%b %drd %Y',
 193     '%b %dth %Y',
 194     '%b %dst %Y %I:%M',
 195     '%b %dnd %Y %I:%M',
 196     '%b %drd %Y %I:%M',
 197     '%b %dth %Y %I:%M',
 198     '%Y %m %d',
 199     '%Y-%m-%d',
 200     '%Y.%m.%d.',
 201     '%Y/%m/%d',
 202     '%Y/%m/%d %H:%M',
 203     '%Y/%m/%d %H:%M:%S',
 204     '%Y%m%d%H%M',
 205     '%Y%m%d%H%M%S',
 206     '%Y%m%d',
 207     '%Y-%m-%d %H:%M',
 208     '%Y-%m-%d %H:%M:%S',
 209     '%Y-%m-%d %H:%M:%S.%f',
 210     '%Y-%m-%d %H:%M:%S:%f',
 211     '%d.%m.%Y %H:%M',
 212     '%d.%m.%Y %H.%M',
 213     '%Y-%m-%dT%H:%M:%SZ',
 214     '%Y-%m-%dT%H:%M:%S.%fZ',
 215     '%Y-%m-%dT%H:%M:%S.%f0Z',
 216     '%Y-%m-%dT%H:%M:%S',
 217     '%Y-%m-%dT%H:%M:%S.%f',
 218     '%Y-%m-%dT%H:%M',
 219     '%b %d %Y at %H:%M',
 220     '%b %d %Y at %H:%M:%S',
 221     '%B %d %Y at %H:%M',
 222     '%B %d %Y at %H:%M:%S',
 223     '%H:%M %d-%b-%Y',
 224 )
 225
 226 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 227 DATE_FORMATS_DAY_FIRST.extend([
 228     '%d-%m-%Y',
 229     '%d.%m.%Y',
 230     '%d.%m.%y',
 231     '%d/%m/%Y',
 232     '%d/%m/%y',
 233     '%d/%m/%Y %H:%M:%S',
 234 ])
 235
 236 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 237 DATE_FORMATS_MONTH_FIRST.extend([
 238     '%m-%d-%Y',
 239     '%m.%d.%Y',
 240     '%m/%d/%Y',
 241     '%m/%d/%y',
 242     '%m/%d/%Y %H:%M:%S',
 243 ])
 244
 245 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 246 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 247
 248 NUMBER_RE = r'\d+(?:\.\d+)?'
 249
 250
 251 @functools.cache
 252 def preferredencoding():
 253     """Get preferred encoding.
 254
 255     Returns the best encoding scheme for the system, based on
 256     locale.getpreferredencoding() and some further tweaks.
 257     """
 258     try:
 259         pref = locale.getpreferredencoding()
 260         'TEST'.encode(pref)
 261     except Exception:
 262         pref = 'UTF-8'
 263
 264     return pref
 265
 266
 267 def write_json_file(obj, fn):
 268     """ Encode obj as JSON and write it to fn, atomically if possible """
 269
 270     tf = tempfile.NamedTemporaryFile(
 271         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 272         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 273
 274     try:
 275         with tf:
 276             json.dump(obj, tf, ensure_ascii=False)
 277         if sys.platform == 'win32':
 278             # Need to remove existing file on Windows, else os.rename raises
 279             # WindowsError or FileExistsError.
 280             with contextlib.suppress(OSError):
 281                 os.unlink(fn)
 282         with contextlib.suppress(OSError):
 283             mask = os.umask(0)
 284             os.umask(mask)
 285             os.chmod(tf.name, 0o666 & ~mask)
 286         os.rename(tf.name, fn)
 287     except Exception:
 288         with contextlib.suppress(OSError):
 289             os.remove(tf.name)
 290         raise
 291
 292
 293 def find_xpath_attr(node, xpath, key, val=None):
 294     """ Find the xpath xpath[@key=val] """
 295     assert re.match(r'^[a-zA-Z_-]+$', key)
 296     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 297     return node.find(expr)
 298
 299 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 300 # the namespace parameter
 301
 302
 303 def xpath_with_ns(path, ns_map):
 304     components = [c.split(':') for c in path.split('/')]
 305     replaced = []
 306     for c in components:
 307         if len(c) == 1:
 308             replaced.append(c[0])
 309         else:
 310             ns, tag = c
 311             replaced.append('{%s}%s' % (ns_map[ns], tag))
 312     return '/'.join(replaced)
 313
 314
 315 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 316     def _find_xpath(xpath):
 317         return node.find(xpath)
 318
 319     if isinstance(xpath, (str, compat_str)):
 320         n = _find_xpath(xpath)
 321     else:
 322         for xp in xpath:
 323             n = _find_xpath(xp)
 324             if n is not None:
 325                 break
 326
 327     if n is None:
 328         if default is not NO_DEFAULT:
 329             return default
 330         elif fatal:
 331             name = xpath if name is None else name
 332             raise ExtractorError('Could not find XML element %s' % name)
 333         else:
 334             return None
 335     return n
 336
 337
 338 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 339     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 340     if n is None or n == default:
 341         return n
 342     if n.text is None:
 343         if default is not NO_DEFAULT:
 344             return default
 345         elif fatal:
 346             name = xpath if name is None else name
 347             raise ExtractorError('Could not find XML element\'s text %s' % name)
 348         else:
 349             return None
 350     return n.text
 351
 352
 353 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 354     n = find_xpath_attr(node, xpath, key)
 355     if n is None:
 356         if default is not NO_DEFAULT:
 357             return default
 358         elif fatal:
 359             name = f'{xpath}[@{key}]' if name is None else name
 360             raise ExtractorError('Could not find XML attribute %s' % name)
 361         else:
 362             return None
 363     return n.attrib[key]
 364
 365
 366 def get_element_by_id(id, html, **kwargs):
 367     """Return the content of the tag with the specified ID in the passed HTML document"""
 368     return get_element_by_attribute('id', id, html, **kwargs)
 369
 370
 371 def get_element_html_by_id(id, html, **kwargs):
 372     """Return the html of the tag with the specified ID in the passed HTML document"""
 373     return get_element_html_by_attribute('id', id, html, **kwargs)
 374
 375
 376 def get_element_by_class(class_name, html):
 377     """Return the content of the first tag with the specified class in the passed HTML document"""
 378     retval = get_elements_by_class(class_name, html)
 379     return retval[0] if retval else None
 380
 381
 382 def get_element_html_by_class(class_name, html):
 383     """Return the html of the first tag with the specified class in the passed HTML document"""
 384     retval = get_elements_html_by_class(class_name, html)
 385     return retval[0] if retval else None
 386
 387
 388 def get_element_by_attribute(attribute, value, html, **kwargs):
 389     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 390     return retval[0] if retval else None
 391
 392
 393 def get_element_html_by_attribute(attribute, value, html, **kargs):
 394     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 395     return retval[0] if retval else None
 396
 397
 398 def get_elements_by_class(class_name, html, **kargs):
 399     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 400     return get_elements_by_attribute(
 401         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 402         html, escape_value=False)
 403
 404
 405 def get_elements_html_by_class(class_name, html):
 406     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 407     return get_elements_html_by_attribute(
 408         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 409         html, escape_value=False)
 410
 411
 412 def get_elements_by_attribute(*args, **kwargs):
 413     """Return the content of the tag with the specified attribute in the passed HTML document"""
 414     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 415
 416
 417 def get_elements_html_by_attribute(*args, **kwargs):
 418     """Return the html of the tag with the specified attribute in the passed HTML document"""
 419     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 420
 421
 422 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 423     """
 424     Return the text (content) and the html (whole) of the tag with the specified
 425     attribute in the passed HTML document
 426     """
 427
 428     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 429
 430     value = re.escape(value) if escape_value else value
 431
 432     partial_element_re = rf'''(?x)
 433         <(?P<tag>[a-zA-Z0-9:._-]+)
 434          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 435          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 436         '''
 437
 438     for m in re.finditer(partial_element_re, html):
 439         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 440
 441         yield (
 442             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 443             whole
 444         )
 445
 446
 447 class HTMLBreakOnClosingTagParser(compat_HTMLParser):
 448     """
 449     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 450     closing tag for the first opening tag it has encountered, and can be used
 451     as a context manager
 452     """
 453
 454     class HTMLBreakOnClosingTagException(Exception):
 455         pass
 456
 457     def __init__(self):
 458         self.tagstack = collections.deque()
 459         compat_HTMLParser.__init__(self)
 460
 461     def __enter__(self):
 462         return self
 463
 464     def __exit__(self, *_):
 465         self.close()
 466
 467     def close(self):
 468         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 469         # so data remains buffered; we no longer have any interest in it, thus
 470         # override this method to discard it
 471         pass
 472
 473     def handle_starttag(self, tag, _):
 474         self.tagstack.append(tag)
 475
 476     def handle_endtag(self, tag):
 477         if not self.tagstack:
 478             raise compat_HTMLParseError('no tags in the stack')
 479         while self.tagstack:
 480             inner_tag = self.tagstack.pop()
 481             if inner_tag == tag:
 482                 break
 483         else:
 484             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 485         if not self.tagstack:
 486             raise self.HTMLBreakOnClosingTagException()
 487
 488
 489 def get_element_text_and_html_by_tag(tag, html):
 490     """
 491     For the first element with the specified tag in the passed HTML document
 492     return its' content (text) and the whole element (html)
 493     """
 494     def find_or_raise(haystack, needle, exc):
 495         try:
 496             return haystack.index(needle)
 497         except ValueError:
 498             raise exc
 499     closing_tag = f'</{tag}>'
 500     whole_start = find_or_raise(
 501         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 502     content_start = find_or_raise(
 503         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 504     content_start += whole_start + 1
 505     with HTMLBreakOnClosingTagParser() as parser:
 506         parser.feed(html[whole_start:content_start])
 507         if not parser.tagstack or parser.tagstack[0] != tag:
 508             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 509         offset = content_start
 510         while offset < len(html):
 511             next_closing_tag_start = find_or_raise(
 512                 html[offset:], closing_tag,
 513                 compat_HTMLParseError(f'closing {tag} tag not found'))
 514             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 515             try:
 516                 parser.feed(html[offset:offset + next_closing_tag_end])
 517                 offset += next_closing_tag_end
 518             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 519                 return html[content_start:offset + next_closing_tag_start], \
 520                     html[whole_start:offset + next_closing_tag_end]
 521         raise compat_HTMLParseError('unexpected end of html')
 522
 523
 524 class HTMLAttributeParser(compat_HTMLParser):
 525     """Trivial HTML parser to gather the attributes for a single element"""
 526
 527     def __init__(self):
 528         self.attrs = {}
 529         compat_HTMLParser.__init__(self)
 530
 531     def handle_starttag(self, tag, attrs):
 532         self.attrs = dict(attrs)
 533
 534
 535 class HTMLListAttrsParser(compat_HTMLParser):
 536     """HTML parser to gather the attributes for the elements of a list"""
 537
 538     def __init__(self):
 539         compat_HTMLParser.__init__(self)
 540         self.items = []
 541         self._level = 0
 542
 543     def handle_starttag(self, tag, attrs):
 544         if tag == 'li' and self._level == 0:
 545             self.items.append(dict(attrs))
 546         self._level += 1
 547
 548     def handle_endtag(self, tag):
 549         self._level -= 1
 550
 551
 552 def extract_attributes(html_element):
 553     """Given a string for an HTML element such as
 554     <el
 555          a="foo" B="bar" c="&98;az" d=boz
 556          empty= noval entity="&amp;"
 557          sq='"' dq="'"
 558     >
 559     Decode and return a dictionary of attributes.
 560     {
 561         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 562         'empty': '', 'noval': None, 'entity': '&',
 563         'sq': '"', 'dq': '\''
 564     }.
 565     """
 566     parser = HTMLAttributeParser()
 567     with contextlib.suppress(compat_HTMLParseError):
 568         parser.feed(html_element)
 569         parser.close()
 570     return parser.attrs
 571
 572
 573 def parse_list(webpage):
 574     """Given a string for an series of HTML <li> elements,
 575     return a dictionary of their attributes"""
 576     parser = HTMLListAttrsParser()
 577     parser.feed(webpage)
 578     parser.close()
 579     return parser.items
 580
 581
 582 def clean_html(html):
 583     """Clean an HTML snippet into a readable string"""
 584
 585     if html is None:  # Convenience for sanitizing descriptions etc.
 586         return html
 587
 588     html = re.sub(r'\s+', ' ', html)
 589     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 590     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 591     # Strip html tags
 592     html = re.sub('<.*?>', '', html)
 593     # Replace html entities
 594     html = unescapeHTML(html)
 595     return html.strip()
 596
 597
 598 class LenientJSONDecoder(json.JSONDecoder):
 599     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 600         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 601         super().__init__(*args, **kwargs)
 602
 603     def decode(self, s):
 604         if self.transform_source:
 605             s = self.transform_source(s)
 606         if self.ignore_extra:
 607             return self.raw_decode(s.lstrip())[0]
 608         return super().decode(s)
 609
 610
 611 def sanitize_open(filename, open_mode):
 612     """Try to open the given filename, and slightly tweak it if this fails.
 613
 614     Attempts to open the given filename. If this fails, it tries to change
 615     the filename slightly, step by step, until it's either able to open it
 616     or it fails and raises a final exception, like the standard open()
 617     function.
 618
 619     It returns the tuple (stream, definitive_file_name).
 620     """
 621     if filename == '-':
 622         if sys.platform == 'win32':
 623             import msvcrt
 624             msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 625         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 626
 627     for attempt in range(2):
 628         try:
 629             try:
 630                 if sys.platform == 'win32':
 631                     # FIXME: An exclusive lock also locks the file from being read.
 632                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 633                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 634                     raise LockingUnsupportedError()
 635                 stream = locked_file(filename, open_mode, block=False).__enter__()
 636             except OSError:
 637                 stream = open(filename, open_mode)
 638             return stream, filename
 639         except OSError as err:
 640             if attempt or err.errno in (errno.EACCES,):
 641                 raise
 642             old_filename, filename = filename, sanitize_path(filename)
 643             if old_filename == filename:
 644                 raise
 645
 646
 647 def timeconvert(timestr):
 648     """Convert RFC 2822 defined time string into system timestamp"""
 649     timestamp = None
 650     timetuple = email.utils.parsedate_tz(timestr)
 651     if timetuple is not None:
 652         timestamp = email.utils.mktime_tz(timetuple)
 653     return timestamp
 654
 655
 656 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 657     """Sanitizes a string so it could be used as part of a filename.
 658     @param restricted   Use a stricter subset of allowed characters
 659     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 660                         If unset, yt-dlp's new sanitization rules are in effect
 661     """
 662     if s == '':
 663         return ''
 664
 665     def replace_insane(char):
 666         if restricted and char in ACCENT_CHARS:
 667             return ACCENT_CHARS[char]
 668         elif not restricted and char == '\n':
 669             return '\0 '
 670         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 671             return ''
 672         elif char == '"':
 673             return '' if restricted else '\''
 674         elif char == ':':
 675             return '\0_\0-' if restricted else '\0 \0-'
 676         elif char in '\\/|*<>':
 677             return '\0_'
 678         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 679             return '\0_'
 680         return char
 681
 682     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 683     result = ''.join(map(replace_insane, s))
 684     if is_id is NO_DEFAULT:
 685         result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result)  # Remove repeated substitute chars
 686         STRIP_RE = '(?:\0.|[ _-])*'
 687         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 688     result = result.replace('\0', '') or '_'
 689
 690     if not is_id:
 691         while '__' in result:
 692             result = result.replace('__', '_')
 693         result = result.strip('_')
 694         # Common case of "Foreign band name - English song title"
 695         if restricted and result.startswith('-_'):
 696             result = result[2:]
 697         if result.startswith('-'):
 698             result = '_' + result[len('-'):]
 699         result = result.lstrip('.')
 700         if not result:
 701             result = '_'
 702     return result
 703
 704
 705 def sanitize_path(s, force=False):
 706     """Sanitizes and normalizes path on Windows"""
 707     if sys.platform == 'win32':
 708         force = False
 709         drive_or_unc, _ = os.path.splitdrive(s)
 710     elif force:
 711         drive_or_unc = ''
 712     else:
 713         return s
 714
 715     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 716     if drive_or_unc:
 717         norm_path.pop(0)
 718     sanitized_path = [
 719         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 720         for path_part in norm_path]
 721     if drive_or_unc:
 722         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 723     elif force and s and s[0] == os.path.sep:
 724         sanitized_path.insert(0, os.path.sep)
 725     return os.path.join(*sanitized_path)
 726
 727
 728 def sanitize_url(url):
 729     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 730     # the number of unwanted failures due to missing protocol
 731     if url is None:
 732         return
 733     elif url.startswith('//'):
 734         return 'http:%s' % url
 735     # Fix some common typos seen so far
 736     COMMON_TYPOS = (
 737         # https://github.com/ytdl-org/youtube-dl/issues/15649
 738         (r'^httpss://', r'https://'),
 739         # https://bx1.be/lives/direct-tv/
 740         (r'^rmtp([es]?)://', r'rtmp\1://'),
 741     )
 742     for mistake, fixup in COMMON_TYPOS:
 743         if re.match(mistake, url):
 744             return re.sub(mistake, fixup, url)
 745     return url
 746
 747
 748 def extract_basic_auth(url):
 749     parts = compat_urlparse.urlsplit(url)
 750     if parts.username is None:
 751         return url, None
 752     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
 753         parts.hostname if parts.port is None
 754         else '%s:%d' % (parts.hostname, parts.port))))
 755     auth_payload = base64.b64encode(
 756         ('%s:%s' % (parts.username, parts.password or '')).encode())
 757     return url, f'Basic {auth_payload.decode()}'
 758
 759
 760 def sanitized_Request(url, *args, **kwargs):
 761     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 762     if auth_header is not None:
 763         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 764         headers['Authorization'] = auth_header
 765     return compat_urllib_request.Request(url, *args, **kwargs)
 766
 767
 768 def expand_path(s):
 769     """Expand shell variables and ~"""
 770     return os.path.expandvars(compat_expanduser(s))
 771
 772
 773 def orderedSet(iterable, *, lazy=False):
 774     """Remove all duplicates from the input iterable"""
 775     def _iter():
 776         seen = []  # Do not use set since the items can be unhashable
 777         for x in iterable:
 778             if x not in seen:
 779                 seen.append(x)
 780                 yield x
 781
 782     return _iter() if lazy else list(_iter())
 783
 784
 785 def _htmlentity_transform(entity_with_semicolon):
 786     """Transforms an HTML entity to a character."""
 787     entity = entity_with_semicolon[:-1]
 788
 789     # Known non-numeric HTML entity
 790     if entity in compat_html_entities.name2codepoint:
 791         return compat_chr(compat_html_entities.name2codepoint[entity])
 792
 793     # TODO: HTML5 allows entities without a semicolon. For example,
 794     # '&Eacuteric' should be decoded as 'Éric'.
 795     if entity_with_semicolon in compat_html_entities_html5:
 796         return compat_html_entities_html5[entity_with_semicolon]
 797
 798     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 799     if mobj is not None:
 800         numstr = mobj.group(1)
 801         if numstr.startswith('x'):
 802             base = 16
 803             numstr = '0%s' % numstr
 804         else:
 805             base = 10
 806         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 807         with contextlib.suppress(ValueError):
 808             return compat_chr(int(numstr, base))
 809
 810     # Unknown entity in name, return its literal representation
 811     return '&%s;' % entity
 812
 813
 814 def unescapeHTML(s):
 815     if s is None:
 816         return None
 817     assert isinstance(s, str)
 818
 819     return re.sub(
 820         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 821
 822
 823 def escapeHTML(text):
 824     return (
 825         text
 826         .replace('&', '&amp;')
 827         .replace('<', '&lt;')
 828         .replace('>', '&gt;')
 829         .replace('"', '&quot;')
 830         .replace("'", '&#39;')
 831     )
 832
 833
 834 def process_communicate_or_kill(p, *args, **kwargs):
 835     write_string('DeprecationWarning: yt_dlp.utils.process_communicate_or_kill is deprecated '
 836                  'and may be removed in a future version. Use yt_dlp.utils.Popen.communicate_or_kill instead')
 837     return Popen.communicate_or_kill(p, *args, **kwargs)
 838
 839
 840 class Popen(subprocess.Popen):
 841     if sys.platform == 'win32':
 842         _startupinfo = subprocess.STARTUPINFO()
 843         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 844     else:
 845         _startupinfo = None
 846
 847     def __init__(self, *args, text=False, **kwargs):
 848         if text is True:
 849             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 850             kwargs.setdefault('encoding', 'utf-8')
 851             kwargs.setdefault('errors', 'replace')
 852         super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
 853
 854     def communicate_or_kill(self, *args, **kwargs):
 855         try:
 856             return self.communicate(*args, **kwargs)
 857         except BaseException:  # Including KeyboardInterrupt
 858             self.kill(timeout=None)
 859             raise
 860
 861     def kill(self, *, timeout=0):
 862         super().kill()
 863         if timeout != 0:
 864             self.wait(timeout=timeout)
 865
 866     @classmethod
 867     def run(cls, *args, **kwargs):
 868         with cls(*args, **kwargs) as proc:
 869             stdout, stderr = proc.communicate_or_kill()
 870             return stdout or '', stderr or '', proc.returncode
 871
 872
 873 def get_subprocess_encoding():
 874     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 875         # For subprocess calls, encode with locale encoding
 876         # Refer to http://stackoverflow.com/a/9951851/35070
 877         encoding = preferredencoding()
 878     else:
 879         encoding = sys.getfilesystemencoding()
 880     if encoding is None:
 881         encoding = 'utf-8'
 882     return encoding
 883
 884
 885 def encodeFilename(s, for_subprocess=False):
 886     assert isinstance(s, str)
 887     return s
 888
 889
 890 def decodeFilename(b, for_subprocess=False):
 891     return b
 892
 893
 894 def encodeArgument(s):
 895     # Legacy code that uses byte strings
 896     # Uncomment the following line after fixing all post processors
 897     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 898     return s if isinstance(s, str) else s.decode('ascii')
 899
 900
 901 def decodeArgument(b):
 902     return b
 903
 904
 905 def decodeOption(optval):
 906     if optval is None:
 907         return optval
 908     if isinstance(optval, bytes):
 909         optval = optval.decode(preferredencoding())
 910
 911     assert isinstance(optval, compat_str)
 912     return optval
 913
 914
 915 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 916
 917
 918 def timetuple_from_msec(msec):
 919     secs, msec = divmod(msec, 1000)
 920     mins, secs = divmod(secs, 60)
 921     hrs, mins = divmod(mins, 60)
 922     return _timetuple(hrs, mins, secs, msec)
 923
 924
 925 def formatSeconds(secs, delim=':', msec=False):
 926     time = timetuple_from_msec(secs * 1000)
 927     if time.hours:
 928         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 929     elif time.minutes:
 930         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 931     else:
 932         ret = '%d' % time.seconds
 933     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 934
 935
 936 def _ssl_load_windows_store_certs(ssl_context, storename):
 937     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 938     try:
 939         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 940                  if encoding == 'x509_asn' and (
 941                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 942     except PermissionError:
 943         return
 944     for cert in certs:
 945         with contextlib.suppress(ssl.SSLError):
 946             ssl_context.load_verify_locations(cadata=cert)
 947
 948
 949 def make_HTTPS_handler(params, **kwargs):
 950     opts_check_certificate = not params.get('nocheckcertificate')
 951     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 952     context.check_hostname = opts_check_certificate
 953     if params.get('legacyserverconnect'):
 954         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 955         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 956         context.set_ciphers('DEFAULT')
 957
 958     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 959     if opts_check_certificate:
 960         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 961             context.load_verify_locations(cafile=certifi.where())
 962         try:
 963             context.load_default_certs()
 964         # Work around the issue in load_default_certs when there are bad certificates. See:
 965         # https://github.com/yt-dlp/yt-dlp/issues/1060,
 966         # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 967         except ssl.SSLError:
 968             # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 969             if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 970                 for storename in ('CA', 'ROOT'):
 971                     _ssl_load_windows_store_certs(context, storename)
 972             context.set_default_verify_paths()
 973
 974     client_certfile = params.get('client_certificate')
 975     if client_certfile:
 976         try:
 977             context.load_cert_chain(
 978                 client_certfile, keyfile=params.get('client_certificate_key'),
 979                 password=params.get('client_certificate_password'))
 980         except ssl.SSLError:
 981             raise YoutubeDLError('Unable to load client certificate')
 982
 983     # Some servers may reject requests if ALPN extension is not sent. See:
 984     # https://github.com/python/cpython/issues/85140
 985     # https://github.com/yt-dlp/yt-dlp/issues/3878
 986     with contextlib.suppress(NotImplementedError):
 987         context.set_alpn_protocols(['http/1.1'])
 988
 989     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 990
 991
 992 def bug_reports_message(before=';'):
 993     msg = ('please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , '
 994            'filling out the appropriate issue template. '
 995            'Confirm you are on the latest version using  yt-dlp -U')
 996
 997     before = before.rstrip()
 998     if not before or before.endswith(('.', '!', '?')):
 999         msg = msg[0].title() + msg[1:]
1000
1001     return (before + ' ' if before else '') + msg
1002
1003
1004 class YoutubeDLError(Exception):
1005     """Base exception for YoutubeDL errors."""
1006     msg = None
1007
1008     def __init__(self, msg=None):
1009         if msg is not None:
1010             self.msg = msg
1011         elif self.msg is None:
1012             self.msg = type(self).__name__
1013         super().__init__(self.msg)
1014
1015
1016 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
1017 if hasattr(ssl, 'CertificateError'):
1018     network_exceptions.append(ssl.CertificateError)
1019 network_exceptions = tuple(network_exceptions)
1020
1021
1022 class ExtractorError(YoutubeDLError):
1023     """Error during info extraction."""
1024
1025     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1026         """ tb, if given, is the original traceback (so that it can be printed out).
1027         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1028         """
1029         if sys.exc_info()[0] in network_exceptions:
1030             expected = True
1031
1032         self.orig_msg = str(msg)
1033         self.traceback = tb
1034         self.expected = expected
1035         self.cause = cause
1036         self.video_id = video_id
1037         self.ie = ie
1038         self.exc_info = sys.exc_info()  # preserve original exception
1039
1040         super().__init__(''.join((
1041             format_field(ie, None, '[%s] '),
1042             format_field(video_id, None, '%s: '),
1043             msg,
1044             format_field(cause, None, ' (caused by %r)'),
1045             '' if expected else bug_reports_message())))
1046
1047     def format_traceback(self):
1048         return join_nonempty(
1049             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1050             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1051             delim='\n') or None
1052
1053
1054 class UnsupportedError(ExtractorError):
1055     def __init__(self, url):
1056         super().__init__(
1057             'Unsupported URL: %s' % url, expected=True)
1058         self.url = url
1059
1060
1061 class RegexNotFoundError(ExtractorError):
1062     """Error when a regex didn't match"""
1063     pass
1064
1065
1066 class GeoRestrictedError(ExtractorError):
1067     """Geographic restriction Error exception.
1068
1069     This exception may be thrown when a video is not available from your
1070     geographic location due to geographic restrictions imposed by a website.
1071     """
1072
1073     def __init__(self, msg, countries=None, **kwargs):
1074         kwargs['expected'] = True
1075         super().__init__(msg, **kwargs)
1076         self.countries = countries
1077
1078
1079 class DownloadError(YoutubeDLError):
1080     """Download Error exception.
1081
1082     This exception may be thrown by FileDownloader objects if they are not
1083     configured to continue on errors. They will contain the appropriate
1084     error message.
1085     """
1086
1087     def __init__(self, msg, exc_info=None):
1088         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1089         super().__init__(msg)
1090         self.exc_info = exc_info
1091
1092
1093 class EntryNotInPlaylist(YoutubeDLError):
1094     """Entry not in playlist exception.
1095
1096     This exception will be thrown by YoutubeDL when a requested entry
1097     is not found in the playlist info_dict
1098     """
1099     msg = 'Entry not found in info'
1100
1101
1102 class SameFileError(YoutubeDLError):
1103     """Same File exception.
1104
1105     This exception will be thrown by FileDownloader objects if they detect
1106     multiple files would have to be downloaded to the same file on disk.
1107     """
1108     msg = 'Fixed output name but more than one file to download'
1109
1110     def __init__(self, filename=None):
1111         if filename is not None:
1112             self.msg += f': {filename}'
1113         super().__init__(self.msg)
1114
1115
1116 class PostProcessingError(YoutubeDLError):
1117     """Post Processing exception.
1118
1119     This exception may be raised by PostProcessor's .run() method to
1120     indicate an error in the postprocessing task.
1121     """
1122
1123
1124 class DownloadCancelled(YoutubeDLError):
1125     """ Exception raised when the download queue should be interrupted """
1126     msg = 'The download was cancelled'
1127
1128
1129 class ExistingVideoReached(DownloadCancelled):
1130     """ --break-on-existing triggered """
1131     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1132
1133
1134 class RejectedVideoReached(DownloadCancelled):
1135     """ --break-on-reject triggered """
1136     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1137
1138
1139 class MaxDownloadsReached(DownloadCancelled):
1140     """ --max-downloads limit has been reached. """
1141     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1142
1143
1144 class ReExtractInfo(YoutubeDLError):
1145     """ Video info needs to be re-extracted. """
1146
1147     def __init__(self, msg, expected=False):
1148         super().__init__(msg)
1149         self.expected = expected
1150
1151
1152 class ThrottledDownload(ReExtractInfo):
1153     """ Download speed below --throttled-rate. """
1154     msg = 'The download speed is below throttle limit'
1155
1156     def __init__(self):
1157         super().__init__(self.msg, expected=False)
1158
1159
1160 class UnavailableVideoError(YoutubeDLError):
1161     """Unavailable Format exception.
1162
1163     This exception will be thrown when a video is requested
1164     in a format that is not available for that video.
1165     """
1166     msg = 'Unable to download video'
1167
1168     def __init__(self, err=None):
1169         if err is not None:
1170             self.msg += f': {err}'
1171         super().__init__(self.msg)
1172
1173
1174 class ContentTooShortError(YoutubeDLError):
1175     """Content Too Short exception.
1176
1177     This exception may be raised by FileDownloader objects when a file they
1178     download is too small for what the server announced first, indicating
1179     the connection was probably interrupted.
1180     """
1181
1182     def __init__(self, downloaded, expected):
1183         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1184         # Both in bytes
1185         self.downloaded = downloaded
1186         self.expected = expected
1187
1188
1189 class XAttrMetadataError(YoutubeDLError):
1190     def __init__(self, code=None, msg='Unknown error'):
1191         super().__init__(msg)
1192         self.code = code
1193         self.msg = msg
1194
1195         # Parsing code and msg
1196         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1197                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1198             self.reason = 'NO_SPACE'
1199         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1200             self.reason = 'VALUE_TOO_LONG'
1201         else:
1202             self.reason = 'NOT_SUPPORTED'
1203
1204
1205 class XAttrUnavailableError(YoutubeDLError):
1206     pass
1207
1208
1209 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1210     hc = http_class(*args, **kwargs)
1211     source_address = ydl_handler._params.get('source_address')
1212
1213     if source_address is not None:
1214         # This is to workaround _create_connection() from socket where it will try all
1215         # address data from getaddrinfo() including IPv6. This filters the result from
1216         # getaddrinfo() based on the source_address value.
1217         # This is based on the cpython socket.create_connection() function.
1218         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1219         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1220             host, port = address
1221             err = None
1222             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1223             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1224             ip_addrs = [addr for addr in addrs if addr[0] == af]
1225             if addrs and not ip_addrs:
1226                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1227                 raise OSError(
1228                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1229                     % (ip_version, source_address[0]))
1230             for res in ip_addrs:
1231                 af, socktype, proto, canonname, sa = res
1232                 sock = None
1233                 try:
1234                     sock = socket.socket(af, socktype, proto)
1235                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1236                         sock.settimeout(timeout)
1237                     sock.bind(source_address)
1238                     sock.connect(sa)
1239                     err = None  # Explicitly break reference cycle
1240                     return sock
1241                 except OSError as _:
1242                     err = _
1243                     if sock is not None:
1244                         sock.close()
1245             if err is not None:
1246                 raise err
1247             else:
1248                 raise OSError('getaddrinfo returns an empty list')
1249         if hasattr(hc, '_create_connection'):
1250             hc._create_connection = _create_connection
1251         hc.source_address = (source_address, 0)
1252
1253     return hc
1254
1255
1256 def handle_youtubedl_headers(headers):
1257     filtered_headers = headers
1258
1259     if 'Youtubedl-no-compression' in filtered_headers:
1260         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1261         del filtered_headers['Youtubedl-no-compression']
1262
1263     return filtered_headers
1264
1265
1266 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
1267     """Handler for HTTP requests and responses.
1268
1269     This class, when installed with an OpenerDirector, automatically adds
1270     the standard headers to every HTTP request and handles gzipped and
1271     deflated responses from web servers. If compression is to be avoided in
1272     a particular request, the original request in the program code only has
1273     to include the HTTP header "Youtubedl-no-compression", which will be
1274     removed before making the real request.
1275
1276     Part of this code was copied from:
1277
1278     http://techknack.net/python-urllib2-handlers/
1279
1280     Andrew Rowls, the author of that code, agreed to release it to the
1281     public domain.
1282     """
1283
1284     def __init__(self, params, *args, **kwargs):
1285         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
1286         self._params = params
1287
1288     def http_open(self, req):
1289         conn_class = compat_http_client.HTTPConnection
1290
1291         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1292         if socks_proxy:
1293             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1294             del req.headers['Ytdl-socks-proxy']
1295
1296         return self.do_open(functools.partial(
1297             _create_http_connection, self, conn_class, False),
1298             req)
1299
1300     @staticmethod
1301     def deflate(data):
1302         if not data:
1303             return data
1304         try:
1305             return zlib.decompress(data, -zlib.MAX_WBITS)
1306         except zlib.error:
1307             return zlib.decompress(data)
1308
1309     @staticmethod
1310     def brotli(data):
1311         if not data:
1312             return data
1313         return brotli.decompress(data)
1314
1315     def http_request(self, req):
1316         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1317         # always respected by websites, some tend to give out URLs with non percent-encoded
1318         # non-ASCII characters (see telemb.py, ard.py [#3412])
1319         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1320         # To work around aforementioned issue we will replace request's original URL with
1321         # percent-encoded one
1322         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1323         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1324         url = req.get_full_url()
1325         url_escaped = escape_url(url)
1326
1327         # Substitute URL if any change after escaping
1328         if url != url_escaped:
1329             req = update_Request(req, url=url_escaped)
1330
1331         for h, v in self._params.get('http_headers', std_headers).items():
1332             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1333             # The dict keys are capitalized because of this bug by urllib
1334             if h.capitalize() not in req.headers:
1335                 req.add_header(h, v)
1336
1337         if 'Accept-encoding' not in req.headers:
1338             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1339
1340         req.headers = handle_youtubedl_headers(req.headers)
1341
1342         return req
1343
1344     def http_response(self, req, resp):
1345         old_resp = resp
1346         # gzip
1347         if resp.headers.get('Content-encoding', '') == 'gzip':
1348             content = resp.read()
1349             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1350             try:
1351                 uncompressed = io.BytesIO(gz.read())
1352             except OSError as original_ioerror:
1353                 # There may be junk add the end of the file
1354                 # See http://stackoverflow.com/q/4928560/35070 for details
1355                 for i in range(1, 1024):
1356                     try:
1357                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1358                         uncompressed = io.BytesIO(gz.read())
1359                     except OSError:
1360                         continue
1361                     break
1362                 else:
1363                     raise original_ioerror
1364             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1365             resp.msg = old_resp.msg
1366             del resp.headers['Content-encoding']
1367         # deflate
1368         if resp.headers.get('Content-encoding', '') == 'deflate':
1369             gz = io.BytesIO(self.deflate(resp.read()))
1370             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1371             resp.msg = old_resp.msg
1372             del resp.headers['Content-encoding']
1373         # brotli
1374         if resp.headers.get('Content-encoding', '') == 'br':
1375             resp = compat_urllib_request.addinfourl(
1376                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1377             resp.msg = old_resp.msg
1378             del resp.headers['Content-encoding']
1379         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1380         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1381         if 300 <= resp.code < 400:
1382             location = resp.headers.get('Location')
1383             if location:
1384                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1385                 location = location.encode('iso-8859-1').decode()
1386                 location_escaped = escape_url(location)
1387                 if location != location_escaped:
1388                     del resp.headers['Location']
1389                     resp.headers['Location'] = location_escaped
1390         return resp
1391
1392     https_request = http_request
1393     https_response = http_response
1394
1395
1396 def make_socks_conn_class(base_class, socks_proxy):
1397     assert issubclass(base_class, (
1398         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
1399
1400     url_components = compat_urlparse.urlparse(socks_proxy)
1401     if url_components.scheme.lower() == 'socks5':
1402         socks_type = ProxyType.SOCKS5
1403     elif url_components.scheme.lower() in ('socks', 'socks4'):
1404         socks_type = ProxyType.SOCKS4
1405     elif url_components.scheme.lower() == 'socks4a':
1406         socks_type = ProxyType.SOCKS4A
1407
1408     def unquote_if_non_empty(s):
1409         if not s:
1410             return s
1411         return compat_urllib_parse_unquote_plus(s)
1412
1413     proxy_args = (
1414         socks_type,
1415         url_components.hostname, url_components.port or 1080,
1416         True,  # Remote DNS
1417         unquote_if_non_empty(url_components.username),
1418         unquote_if_non_empty(url_components.password),
1419     )
1420
1421     class SocksConnection(base_class):
1422         def connect(self):
1423             self.sock = sockssocket()
1424             self.sock.setproxy(*proxy_args)
1425             if isinstance(self.timeout, (int, float)):
1426                 self.sock.settimeout(self.timeout)
1427             self.sock.connect((self.host, self.port))
1428
1429             if isinstance(self, compat_http_client.HTTPSConnection):
1430                 if hasattr(self, '_context'):  # Python > 2.6
1431                     self.sock = self._context.wrap_socket(
1432                         self.sock, server_hostname=self.host)
1433                 else:
1434                     self.sock = ssl.wrap_socket(self.sock)
1435
1436     return SocksConnection
1437
1438
1439 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
1440     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1441         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
1442         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
1443         self._params = params
1444
1445     def https_open(self, req):
1446         kwargs = {}
1447         conn_class = self._https_conn_class
1448
1449         if hasattr(self, '_context'):  # python > 2.6
1450             kwargs['context'] = self._context
1451         if hasattr(self, '_check_hostname'):  # python 3.x
1452             kwargs['check_hostname'] = self._check_hostname
1453
1454         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1455         if socks_proxy:
1456             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1457             del req.headers['Ytdl-socks-proxy']
1458
1459         try:
1460             return self.do_open(
1461                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1462         except urllib.error.URLError as e:
1463             if (isinstance(e.reason, ssl.SSLError)
1464                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1465                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1466             raise
1467
1468
1469 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1470     """
1471     See [1] for cookie file format.
1472
1473     1. https://curl.haxx.se/docs/http-cookies.html
1474     """
1475     _HTTPONLY_PREFIX = '#HttpOnly_'
1476     _ENTRY_LEN = 7
1477     _HEADER = '''# Netscape HTTP Cookie File
1478 # This file is generated by yt-dlp.  Do not edit.
1479
1480 '''
1481     _CookieFileEntry = collections.namedtuple(
1482         'CookieFileEntry',
1483         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1484
1485     def __init__(self, filename=None, *args, **kwargs):
1486         super().__init__(None, *args, **kwargs)
1487         if self.is_path(filename):
1488             filename = os.fspath(filename)
1489         self.filename = filename
1490
1491     @staticmethod
1492     def _true_or_false(cndn):
1493         return 'TRUE' if cndn else 'FALSE'
1494
1495     @staticmethod
1496     def is_path(file):
1497         return isinstance(file, (str, bytes, os.PathLike))
1498
1499     @contextlib.contextmanager
1500     def open(self, file, *, write=False):
1501         if self.is_path(file):
1502             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1503                 yield f
1504         else:
1505             if write:
1506                 file.truncate(0)
1507             yield file
1508
1509     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1510         now = time.time()
1511         for cookie in self:
1512             if (not ignore_discard and cookie.discard
1513                     or not ignore_expires and cookie.is_expired(now)):
1514                 continue
1515             name, value = cookie.name, cookie.value
1516             if value is None:
1517                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1518                 # with no name, whereas http.cookiejar regards it as a
1519                 # cookie with no value.
1520                 name, value = '', name
1521             f.write('%s\n' % '\t'.join((
1522                 cookie.domain,
1523                 self._true_or_false(cookie.domain.startswith('.')),
1524                 cookie.path,
1525                 self._true_or_false(cookie.secure),
1526                 str_or_none(cookie.expires, default=''),
1527                 name, value
1528             )))
1529
1530     def save(self, filename=None, *args, **kwargs):
1531         """
1532         Save cookies to a file.
1533         Code is taken from CPython 3.6
1534         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1535
1536         if filename is None:
1537             if self.filename is not None:
1538                 filename = self.filename
1539             else:
1540                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1541
1542         # Store session cookies with `expires` set to 0 instead of an empty string
1543         for cookie in self:
1544             if cookie.expires is None:
1545                 cookie.expires = 0
1546
1547         with self.open(filename, write=True) as f:
1548             f.write(self._HEADER)
1549             self._really_save(f, *args, **kwargs)
1550
1551     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1552         """Load cookies from a file."""
1553         if filename is None:
1554             if self.filename is not None:
1555                 filename = self.filename
1556             else:
1557                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1558
1559         def prepare_line(line):
1560             if line.startswith(self._HTTPONLY_PREFIX):
1561                 line = line[len(self._HTTPONLY_PREFIX):]
1562             # comments and empty lines are fine
1563             if line.startswith('#') or not line.strip():
1564                 return line
1565             cookie_list = line.split('\t')
1566             if len(cookie_list) != self._ENTRY_LEN:
1567                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
1568             cookie = self._CookieFileEntry(*cookie_list)
1569             if cookie.expires_at and not cookie.expires_at.isdigit():
1570                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1571             return line
1572
1573         cf = io.StringIO()
1574         with self.open(filename) as f:
1575             for line in f:
1576                 try:
1577                     cf.write(prepare_line(line))
1578                 except compat_cookiejar.LoadError as e:
1579                     if f'{line.strip()} '[0] in '[{"':
1580                         raise compat_cookiejar.LoadError(
1581                             'Cookies file must be Netscape formatted, not JSON. See  '
1582                             'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl')
1583                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1584                     continue
1585         cf.seek(0)
1586         self._really_load(cf, filename, ignore_discard, ignore_expires)
1587         # Session cookies are denoted by either `expires` field set to
1588         # an empty string or 0. MozillaCookieJar only recognizes the former
1589         # (see [1]). So we need force the latter to be recognized as session
1590         # cookies on our own.
1591         # Session cookies may be important for cookies-based authentication,
1592         # e.g. usually, when user does not check 'Remember me' check box while
1593         # logging in on a site, some important cookies are stored as session
1594         # cookies so that not recognizing them will result in failed login.
1595         # 1. https://bugs.python.org/issue17164
1596         for cookie in self:
1597             # Treat `expires=0` cookies as session cookies
1598             if cookie.expires == 0:
1599                 cookie.expires = None
1600                 cookie.discard = True
1601
1602
1603 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
1604     def __init__(self, cookiejar=None):
1605         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
1606
1607     def http_response(self, request, response):
1608         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
1609
1610     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
1611     https_response = http_response
1612
1613
1614 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
1615     """YoutubeDL redirect handler
1616
1617     The code is based on HTTPRedirectHandler implementation from CPython [1].
1618
1619     This redirect handler solves two issues:
1620      - ensures redirect URL is always unicode under python 2
1621      - introduces support for experimental HTTP response status code
1622        308 Permanent Redirect [2] used by some sites [3]
1623
1624     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1625     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1626     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1627     """
1628
1629     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
1630
1631     def redirect_request(self, req, fp, code, msg, headers, newurl):
1632         """Return a Request or None in response to a redirect.
1633
1634         This is called by the http_error_30x methods when a
1635         redirection response is received.  If a redirection should
1636         take place, return a new Request to allow http_error_30x to
1637         perform the redirect.  Otherwise, raise HTTPError if no-one
1638         else should try to handle this url.  Return None if you can't
1639         but another Handler might.
1640         """
1641         m = req.get_method()
1642         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1643                  or code in (301, 302, 303) and m == "POST")):
1644             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
1645         # Strictly (according to RFC 2616), 301 or 302 in response to
1646         # a POST MUST NOT cause a redirection without confirmation
1647         # from the user (of urllib.request, in this case).  In practice,
1648         # essentially all clients do redirect in this case, so we do
1649         # the same.
1650
1651         # Be conciliant with URIs containing a space.  This is mainly
1652         # redundant with the more complete encoding done in http_error_302(),
1653         # but it is kept for compatibility with other callers.
1654         newurl = newurl.replace(' ', '%20')
1655
1656         CONTENT_HEADERS = ("content-length", "content-type")
1657         # NB: don't use dict comprehension for python 2.6 compatibility
1658         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1659
1660         # A 303 must either use GET or HEAD for subsequent request
1661         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1662         if code == 303 and m != 'HEAD':
1663             m = 'GET'
1664         # 301 and 302 redirects are commonly turned into a GET from a POST
1665         # for subsequent requests by browsers, so we'll do the same.
1666         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1667         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1668         if code in (301, 302) and m == 'POST':
1669             m = 'GET'
1670
1671         return compat_urllib_request.Request(
1672             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1673             unverifiable=True, method=m)
1674
1675
1676 def extract_timezone(date_str):
1677     m = re.search(
1678         r'''(?x)
1679             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1680             (?P<tz>Z|                                            # just the UTC Z, or
1681                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1682                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1683                    [ ]?                                          # optional space
1684                 (?P<sign>\+|-)                                   # +/-
1685                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1686             $)
1687         ''', date_str)
1688     if not m:
1689         timezone = datetime.timedelta()
1690     else:
1691         date_str = date_str[:-len(m.group('tz'))]
1692         if not m.group('sign'):
1693             timezone = datetime.timedelta()
1694         else:
1695             sign = 1 if m.group('sign') == '+' else -1
1696             timezone = datetime.timedelta(
1697                 hours=sign * int(m.group('hours')),
1698                 minutes=sign * int(m.group('minutes')))
1699     return timezone, date_str
1700
1701
1702 def parse_iso8601(date_str, delimiter='T', timezone=None):
1703     """ Return a UNIX timestamp from the given date """
1704
1705     if date_str is None:
1706         return None
1707
1708     date_str = re.sub(r'\.[0-9]+', '', date_str)
1709
1710     if timezone is None:
1711         timezone, date_str = extract_timezone(date_str)
1712
1713     with contextlib.suppress(ValueError):
1714         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1715         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1716         return calendar.timegm(dt.timetuple())
1717
1718
1719 def date_formats(day_first=True):
1720     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1721
1722
1723 def unified_strdate(date_str, day_first=True):
1724     """Return a string with the date in the format YYYYMMDD"""
1725
1726     if date_str is None:
1727         return None
1728     upload_date = None
1729     # Replace commas
1730     date_str = date_str.replace(',', ' ')
1731     # Remove AM/PM + timezone
1732     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1733     _, date_str = extract_timezone(date_str)
1734
1735     for expression in date_formats(day_first):
1736         with contextlib.suppress(ValueError):
1737             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1738     if upload_date is None:
1739         timetuple = email.utils.parsedate_tz(date_str)
1740         if timetuple:
1741             with contextlib.suppress(ValueError):
1742                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1743     if upload_date is not None:
1744         return compat_str(upload_date)
1745
1746
1747 def unified_timestamp(date_str, day_first=True):
1748     if date_str is None:
1749         return None
1750
1751     date_str = re.sub(r'[,|]', '', date_str)
1752
1753     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1754     timezone, date_str = extract_timezone(date_str)
1755
1756     # Remove AM/PM + timezone
1757     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1758
1759     # Remove unrecognized timezones from ISO 8601 alike timestamps
1760     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1761     if m:
1762         date_str = date_str[:-len(m.group('tz'))]
1763
1764     # Python only supports microseconds, so remove nanoseconds
1765     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1766     if m:
1767         date_str = m.group(1)
1768
1769     for expression in date_formats(day_first):
1770         with contextlib.suppress(ValueError):
1771             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1772             return calendar.timegm(dt.timetuple())
1773     timetuple = email.utils.parsedate_tz(date_str)
1774     if timetuple:
1775         return calendar.timegm(timetuple) + pm_delta * 3600
1776
1777
1778 def determine_ext(url, default_ext='unknown_video'):
1779     if url is None or '.' not in url:
1780         return default_ext
1781     guess = url.partition('?')[0].rpartition('.')[2]
1782     if re.match(r'^[A-Za-z0-9]+$', guess):
1783         return guess
1784     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1785     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1786         return guess.rstrip('/')
1787     else:
1788         return default_ext
1789
1790
1791 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1792     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1793
1794
1795 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1796     R"""
1797     Return a datetime object from a string.
1798     Supported format:
1799         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1800
1801     @param format       strftime format of DATE
1802     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1803                         auto: round to the unit provided in date_str (if applicable).
1804     """
1805     auto_precision = False
1806     if precision == 'auto':
1807         auto_precision = True
1808         precision = 'microsecond'
1809     today = datetime_round(datetime.datetime.utcnow(), precision)
1810     if date_str in ('now', 'today'):
1811         return today
1812     if date_str == 'yesterday':
1813         return today - datetime.timedelta(days=1)
1814     match = re.match(
1815         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1816         date_str)
1817     if match is not None:
1818         start_time = datetime_from_str(match.group('start'), precision, format)
1819         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1820         unit = match.group('unit')
1821         if unit == 'month' or unit == 'year':
1822             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1823             unit = 'day'
1824         else:
1825             if unit == 'week':
1826                 unit = 'day'
1827                 time *= 7
1828             delta = datetime.timedelta(**{unit + 's': time})
1829             new_date = start_time + delta
1830         if auto_precision:
1831             return datetime_round(new_date, unit)
1832         return new_date
1833
1834     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1835
1836
1837 def date_from_str(date_str, format='%Y%m%d', strict=False):
1838     R"""
1839     Return a date object from a string using datetime_from_str
1840
1841     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1842                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1843     """
1844     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1845         raise ValueError(f'Invalid date format "{date_str}"')
1846     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1847
1848
1849 def datetime_add_months(dt, months):
1850     """Increment/Decrement a datetime object by months."""
1851     month = dt.month + months - 1
1852     year = dt.year + month // 12
1853     month = month % 12 + 1
1854     day = min(dt.day, calendar.monthrange(year, month)[1])
1855     return dt.replace(year, month, day)
1856
1857
1858 def datetime_round(dt, precision='day'):
1859     """
1860     Round a datetime object's time to a specific precision
1861     """
1862     if precision == 'microsecond':
1863         return dt
1864
1865     unit_seconds = {
1866         'day': 86400,
1867         'hour': 3600,
1868         'minute': 60,
1869         'second': 1,
1870     }
1871     roundto = lambda x, n: ((x + n / 2) // n) * n
1872     timestamp = calendar.timegm(dt.timetuple())
1873     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1874
1875
1876 def hyphenate_date(date_str):
1877     """
1878     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1879     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1880     if match is not None:
1881         return '-'.join(match.groups())
1882     else:
1883         return date_str
1884
1885
1886 class DateRange:
1887     """Represents a time interval between two dates"""
1888
1889     def __init__(self, start=None, end=None):
1890         """start and end must be strings in the format accepted by date"""
1891         if start is not None:
1892             self.start = date_from_str(start, strict=True)
1893         else:
1894             self.start = datetime.datetime.min.date()
1895         if end is not None:
1896             self.end = date_from_str(end, strict=True)
1897         else:
1898             self.end = datetime.datetime.max.date()
1899         if self.start > self.end:
1900             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1901
1902     @classmethod
1903     def day(cls, day):
1904         """Returns a range that only contains the given day"""
1905         return cls(day, day)
1906
1907     def __contains__(self, date):
1908         """Check if the date is in the range"""
1909         if not isinstance(date, datetime.date):
1910             date = date_from_str(date)
1911         return self.start <= date <= self.end
1912
1913     def __str__(self):
1914         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1915
1916
1917 def platform_name():
1918     """ Returns the platform name as a compat_str """
1919     res = platform.platform()
1920     if isinstance(res, bytes):
1921         res = res.decode(preferredencoding())
1922
1923     assert isinstance(res, compat_str)
1924     return res
1925
1926
1927 @functools.cache
1928 def get_windows_version():
1929     ''' Get Windows version. returns () if it's not running on Windows '''
1930     if compat_os_name == 'nt':
1931         return version_tuple(platform.win32_ver()[1])
1932     else:
1933         return ()
1934
1935
1936 def write_string(s, out=None, encoding=None):
1937     assert isinstance(s, str)
1938     out = out or sys.stderr
1939
1940     if compat_os_name == 'nt' and supports_terminal_sequences(out):
1941         s = re.sub(r'([\r\n]+)', r' \1', s)
1942
1943     enc, buffer = None, out
1944     if 'b' in getattr(out, 'mode', ''):
1945         enc = encoding or preferredencoding()
1946     elif hasattr(out, 'buffer'):
1947         buffer = out.buffer
1948         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1949
1950     buffer.write(s.encode(enc, 'ignore') if enc else s)
1951     out.flush()
1952
1953
1954 def bytes_to_intlist(bs):
1955     if not bs:
1956         return []
1957     if isinstance(bs[0], int):  # Python 3
1958         return list(bs)
1959     else:
1960         return [ord(c) for c in bs]
1961
1962
1963 def intlist_to_bytes(xs):
1964     if not xs:
1965         return b''
1966     return compat_struct_pack('%dB' % len(xs), *xs)
1967
1968
1969 class LockingUnsupportedError(OSError):
1970     msg = 'File locking is not supported'
1971
1972     def __init__(self):
1973         super().__init__(self.msg)
1974
1975
1976 # Cross-platform file locking
1977 if sys.platform == 'win32':
1978     import ctypes.wintypes
1979     import msvcrt
1980
1981     class OVERLAPPED(ctypes.Structure):
1982         _fields_ = [
1983             ('Internal', ctypes.wintypes.LPVOID),
1984             ('InternalHigh', ctypes.wintypes.LPVOID),
1985             ('Offset', ctypes.wintypes.DWORD),
1986             ('OffsetHigh', ctypes.wintypes.DWORD),
1987             ('hEvent', ctypes.wintypes.HANDLE),
1988         ]
1989
1990     kernel32 = ctypes.windll.kernel32
1991     LockFileEx = kernel32.LockFileEx
1992     LockFileEx.argtypes = [
1993         ctypes.wintypes.HANDLE,     # hFile
1994         ctypes.wintypes.DWORD,      # dwFlags
1995         ctypes.wintypes.DWORD,      # dwReserved
1996         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1997         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1998         ctypes.POINTER(OVERLAPPED)  # Overlapped
1999     ]
2000     LockFileEx.restype = ctypes.wintypes.BOOL
2001     UnlockFileEx = kernel32.UnlockFileEx
2002     UnlockFileEx.argtypes = [
2003         ctypes.wintypes.HANDLE,     # hFile
2004         ctypes.wintypes.DWORD,      # dwReserved
2005         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2006         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2007         ctypes.POINTER(OVERLAPPED)  # Overlapped
2008     ]
2009     UnlockFileEx.restype = ctypes.wintypes.BOOL
2010     whole_low = 0xffffffff
2011     whole_high = 0x7fffffff
2012
2013     def _lock_file(f, exclusive, block):
2014         overlapped = OVERLAPPED()
2015         overlapped.Offset = 0
2016         overlapped.OffsetHigh = 0
2017         overlapped.hEvent = 0
2018         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2019
2020         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2021                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2022                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2023             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2024             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2025
2026     def _unlock_file(f):
2027         assert f._lock_file_overlapped_p
2028         handle = msvcrt.get_osfhandle(f.fileno())
2029         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2030             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2031
2032 else:
2033     try:
2034         import fcntl
2035
2036         def _lock_file(f, exclusive, block):
2037             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2038             if not block:
2039                 flags |= fcntl.LOCK_NB
2040             try:
2041                 fcntl.flock(f, flags)
2042             except BlockingIOError:
2043                 raise
2044             except OSError:  # AOSP does not have flock()
2045                 fcntl.lockf(f, flags)
2046
2047         def _unlock_file(f):
2048             try:
2049                 fcntl.flock(f, fcntl.LOCK_UN)
2050             except OSError:
2051                 fcntl.lockf(f, fcntl.LOCK_UN)
2052
2053     except ImportError:
2054
2055         def _lock_file(f, exclusive, block):
2056             raise LockingUnsupportedError()
2057
2058         def _unlock_file(f):
2059             raise LockingUnsupportedError()
2060
2061
2062 class locked_file:
2063     locked = False
2064
2065     def __init__(self, filename, mode, block=True, encoding=None):
2066         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2067             raise NotImplementedError(mode)
2068         self.mode, self.block = mode, block
2069
2070         writable = any(f in mode for f in 'wax+')
2071         readable = any(f in mode for f in 'r+')
2072         flags = functools.reduce(operator.ior, (
2073             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2074             getattr(os, 'O_BINARY', 0),  # Windows only
2075             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2076             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2077             os.O_APPEND if 'a' in mode else 0,
2078             os.O_EXCL if 'x' in mode else 0,
2079             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2080         ))
2081
2082         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2083
2084     def __enter__(self):
2085         exclusive = 'r' not in self.mode
2086         try:
2087             _lock_file(self.f, exclusive, self.block)
2088             self.locked = True
2089         except OSError:
2090             self.f.close()
2091             raise
2092         if 'w' in self.mode:
2093             try:
2094                 self.f.truncate()
2095             except OSError as e:
2096                 if e.errno not in (
2097                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2098                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2099                 ):
2100                     raise
2101         return self
2102
2103     def unlock(self):
2104         if not self.locked:
2105             return
2106         try:
2107             _unlock_file(self.f)
2108         finally:
2109             self.locked = False
2110
2111     def __exit__(self, *_):
2112         try:
2113             self.unlock()
2114         finally:
2115             self.f.close()
2116
2117     open = __enter__
2118     close = __exit__
2119
2120     def __getattr__(self, attr):
2121         return getattr(self.f, attr)
2122
2123     def __iter__(self):
2124         return iter(self.f)
2125
2126
2127 @functools.cache
2128 def get_filesystem_encoding():
2129     encoding = sys.getfilesystemencoding()
2130     return encoding if encoding is not None else 'utf-8'
2131
2132
2133 def shell_quote(args):
2134     quoted_args = []
2135     encoding = get_filesystem_encoding()
2136     for a in args:
2137         if isinstance(a, bytes):
2138             # We may get a filename encoded with 'encodeFilename'
2139             a = a.decode(encoding)
2140         quoted_args.append(compat_shlex_quote(a))
2141     return ' '.join(quoted_args)
2142
2143
2144 def smuggle_url(url, data):
2145     """ Pass additional data in a URL for internal use. """
2146
2147     url, idata = unsmuggle_url(url, {})
2148     data.update(idata)
2149     sdata = compat_urllib_parse_urlencode(
2150         {'__youtubedl_smuggle': json.dumps(data)})
2151     return url + '#' + sdata
2152
2153
2154 def unsmuggle_url(smug_url, default=None):
2155     if '#__youtubedl_smuggle' not in smug_url:
2156         return smug_url, default
2157     url, _, sdata = smug_url.rpartition('#')
2158     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
2159     data = json.loads(jsond)
2160     return url, data
2161
2162
2163 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2164     """ Formats numbers with decimal sufixes like K, M, etc """
2165     num, factor = float_or_none(num), float(factor)
2166     if num is None or num < 0:
2167         return None
2168     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2169     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2170     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2171     if factor == 1024:
2172         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2173     converted = num / (factor ** exponent)
2174     return fmt % (converted, suffix)
2175
2176
2177 def format_bytes(bytes):
2178     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2179
2180
2181 def lookup_unit_table(unit_table, s):
2182     units_re = '|'.join(re.escape(u) for u in unit_table)
2183     m = re.match(
2184         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2185     if not m:
2186         return None
2187     num_str = m.group('num').replace(',', '.')
2188     mult = unit_table[m.group('unit')]
2189     return int(float(num_str) * mult)
2190
2191
2192 def parse_filesize(s):
2193     if s is None:
2194         return None
2195
2196     # The lower-case forms are of course incorrect and unofficial,
2197     # but we support those too
2198     _UNIT_TABLE = {
2199         'B': 1,
2200         'b': 1,
2201         'bytes': 1,
2202         'KiB': 1024,
2203         'KB': 1000,
2204         'kB': 1024,
2205         'Kb': 1000,
2206         'kb': 1000,
2207         'kilobytes': 1000,
2208         'kibibytes': 1024,
2209         'MiB': 1024 ** 2,
2210         'MB': 1000 ** 2,
2211         'mB': 1024 ** 2,
2212         'Mb': 1000 ** 2,
2213         'mb': 1000 ** 2,
2214         'megabytes': 1000 ** 2,
2215         'mebibytes': 1024 ** 2,
2216         'GiB': 1024 ** 3,
2217         'GB': 1000 ** 3,
2218         'gB': 1024 ** 3,
2219         'Gb': 1000 ** 3,
2220         'gb': 1000 ** 3,
2221         'gigabytes': 1000 ** 3,
2222         'gibibytes': 1024 ** 3,
2223         'TiB': 1024 ** 4,
2224         'TB': 1000 ** 4,
2225         'tB': 1024 ** 4,
2226         'Tb': 1000 ** 4,
2227         'tb': 1000 ** 4,
2228         'terabytes': 1000 ** 4,
2229         'tebibytes': 1024 ** 4,
2230         'PiB': 1024 ** 5,
2231         'PB': 1000 ** 5,
2232         'pB': 1024 ** 5,
2233         'Pb': 1000 ** 5,
2234         'pb': 1000 ** 5,
2235         'petabytes': 1000 ** 5,
2236         'pebibytes': 1024 ** 5,
2237         'EiB': 1024 ** 6,
2238         'EB': 1000 ** 6,
2239         'eB': 1024 ** 6,
2240         'Eb': 1000 ** 6,
2241         'eb': 1000 ** 6,
2242         'exabytes': 1000 ** 6,
2243         'exbibytes': 1024 ** 6,
2244         'ZiB': 1024 ** 7,
2245         'ZB': 1000 ** 7,
2246         'zB': 1024 ** 7,
2247         'Zb': 1000 ** 7,
2248         'zb': 1000 ** 7,
2249         'zettabytes': 1000 ** 7,
2250         'zebibytes': 1024 ** 7,
2251         'YiB': 1024 ** 8,
2252         'YB': 1000 ** 8,
2253         'yB': 1024 ** 8,
2254         'Yb': 1000 ** 8,
2255         'yb': 1000 ** 8,
2256         'yottabytes': 1000 ** 8,
2257         'yobibytes': 1024 ** 8,
2258     }
2259
2260     return lookup_unit_table(_UNIT_TABLE, s)
2261
2262
2263 def parse_count(s):
2264     if s is None:
2265         return None
2266
2267     s = re.sub(r'^[^\d]+\s', '', s).strip()
2268
2269     if re.match(r'^[\d,.]+$', s):
2270         return str_to_int(s)
2271
2272     _UNIT_TABLE = {
2273         'k': 1000,
2274         'K': 1000,
2275         'm': 1000 ** 2,
2276         'M': 1000 ** 2,
2277         'kk': 1000 ** 2,
2278         'KK': 1000 ** 2,
2279         'b': 1000 ** 3,
2280         'B': 1000 ** 3,
2281     }
2282
2283     ret = lookup_unit_table(_UNIT_TABLE, s)
2284     if ret is not None:
2285         return ret
2286
2287     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2288     if mobj:
2289         return str_to_int(mobj.group(1))
2290
2291
2292 def parse_resolution(s, *, lenient=False):
2293     if s is None:
2294         return {}
2295
2296     if lenient:
2297         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2298     else:
2299         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2300     if mobj:
2301         return {
2302             'width': int(mobj.group('w')),
2303             'height': int(mobj.group('h')),
2304         }
2305
2306     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2307     if mobj:
2308         return {'height': int(mobj.group(1))}
2309
2310     mobj = re.search(r'\b([48])[kK]\b', s)
2311     if mobj:
2312         return {'height': int(mobj.group(1)) * 540}
2313
2314     return {}
2315
2316
2317 def parse_bitrate(s):
2318     if not isinstance(s, compat_str):
2319         return
2320     mobj = re.search(r'\b(\d+)\s*kbps', s)
2321     if mobj:
2322         return int(mobj.group(1))
2323
2324
2325 def month_by_name(name, lang='en'):
2326     """ Return the number of a month by (locale-independently) English name """
2327
2328     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2329
2330     try:
2331         return month_names.index(name) + 1
2332     except ValueError:
2333         return None
2334
2335
2336 def month_by_abbreviation(abbrev):
2337     """ Return the number of a month by (locale-independently) English
2338         abbreviations """
2339
2340     try:
2341         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2342     except ValueError:
2343         return None
2344
2345
2346 def fix_xml_ampersands(xml_str):
2347     """Replace all the '&' by '&amp;' in XML"""
2348     return re.sub(
2349         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2350         '&amp;',
2351         xml_str)
2352
2353
2354 def setproctitle(title):
2355     assert isinstance(title, compat_str)
2356
2357     # ctypes in Jython is not complete
2358     # http://bugs.jython.org/issue2148
2359     if sys.platform.startswith('java'):
2360         return
2361
2362     try:
2363         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2364     except OSError:
2365         return
2366     except TypeError:
2367         # LoadLibrary in Windows Python 2.7.13 only expects
2368         # a bytestring, but since unicode_literals turns
2369         # every string into a unicode string, it fails.
2370         return
2371     title_bytes = title.encode()
2372     buf = ctypes.create_string_buffer(len(title_bytes))
2373     buf.value = title_bytes
2374     try:
2375         libc.prctl(15, buf, 0, 0, 0)
2376     except AttributeError:
2377         return  # Strange libc, just skip this
2378
2379
2380 def remove_start(s, start):
2381     return s[len(start):] if s is not None and s.startswith(start) else s
2382
2383
2384 def remove_end(s, end):
2385     return s[:-len(end)] if s is not None and s.endswith(end) else s
2386
2387
2388 def remove_quotes(s):
2389     if s is None or len(s) < 2:
2390         return s
2391     for quote in ('"', "'", ):
2392         if s[0] == quote and s[-1] == quote:
2393             return s[1:-1]
2394     return s
2395
2396
2397 def get_domain(url):
2398     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
2399     return domain.group('domain') if domain else None
2400
2401
2402 def url_basename(url):
2403     path = compat_urlparse.urlparse(url).path
2404     return path.strip('/').split('/')[-1]
2405
2406
2407 def base_url(url):
2408     return re.match(r'https?://[^?#&]+/', url).group()
2409
2410
2411 def urljoin(base, path):
2412     if isinstance(path, bytes):
2413         path = path.decode()
2414     if not isinstance(path, compat_str) or not path:
2415         return None
2416     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2417         return path
2418     if isinstance(base, bytes):
2419         base = base.decode()
2420     if not isinstance(base, compat_str) or not re.match(
2421             r'^(?:https?:)?//', base):
2422         return None
2423     return compat_urlparse.urljoin(base, path)
2424
2425
2426 class HEADRequest(compat_urllib_request.Request):
2427     def get_method(self):
2428         return 'HEAD'
2429
2430
2431 class PUTRequest(compat_urllib_request.Request):
2432     def get_method(self):
2433         return 'PUT'
2434
2435
2436 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2437     if get_attr and v is not None:
2438         v = getattr(v, get_attr, None)
2439     try:
2440         return int(v) * invscale // scale
2441     except (ValueError, TypeError, OverflowError):
2442         return default
2443
2444
2445 def str_or_none(v, default=None):
2446     return default if v is None else compat_str(v)
2447
2448
2449 def str_to_int(int_str):
2450     """ A more relaxed version of int_or_none """
2451     if isinstance(int_str, int):
2452         return int_str
2453     elif isinstance(int_str, compat_str):
2454         int_str = re.sub(r'[,\.\+]', '', int_str)
2455         return int_or_none(int_str)
2456
2457
2458 def float_or_none(v, scale=1, invscale=1, default=None):
2459     if v is None:
2460         return default
2461     try:
2462         return float(v) * invscale / scale
2463     except (ValueError, TypeError):
2464         return default
2465
2466
2467 def bool_or_none(v, default=None):
2468     return v if isinstance(v, bool) else default
2469
2470
2471 def strip_or_none(v, default=None):
2472     return v.strip() if isinstance(v, compat_str) else default
2473
2474
2475 def url_or_none(url):
2476     if not url or not isinstance(url, compat_str):
2477         return None
2478     url = url.strip()
2479     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2480
2481
2482 def request_to_url(req):
2483     if isinstance(req, compat_urllib_request.Request):
2484         return req.get_full_url()
2485     else:
2486         return req
2487
2488
2489 def strftime_or_none(timestamp, date_format, default=None):
2490     datetime_object = None
2491     try:
2492         if isinstance(timestamp, (int, float)):  # unix timestamp
2493             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2494         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
2495             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2496         return datetime_object.strftime(date_format)
2497     except (ValueError, TypeError, AttributeError):
2498         return default
2499
2500
2501 def parse_duration(s):
2502     if not isinstance(s, str):
2503         return None
2504     s = s.strip()
2505     if not s:
2506         return None
2507
2508     days, hours, mins, secs, ms = [None] * 5
2509     m = re.match(r'''(?x)
2510             (?P<before_secs>
2511                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2512             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2513             (?P<ms>[.:][0-9]+)?Z?$
2514         ''', s)
2515     if m:
2516         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2517     else:
2518         m = re.match(
2519             r'''(?ix)(?:P?
2520                 (?:
2521                     [0-9]+\s*y(?:ears?)?,?\s*
2522                 )?
2523                 (?:
2524                     [0-9]+\s*m(?:onths?)?,?\s*
2525                 )?
2526                 (?:
2527                     [0-9]+\s*w(?:eeks?)?,?\s*
2528                 )?
2529                 (?:
2530                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2531                 )?
2532                 T)?
2533                 (?:
2534                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2535                 )?
2536                 (?:
2537                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2538                 )?
2539                 (?:
2540                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2541                 )?Z?$''', s)
2542         if m:
2543             days, hours, mins, secs, ms = m.groups()
2544         else:
2545             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2546             if m:
2547                 hours, mins = m.groups()
2548             else:
2549                 return None
2550
2551     if ms:
2552         ms = ms.replace(':', '.')
2553     return sum(float(part or 0) * mult for part, mult in (
2554         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2555
2556
2557 def prepend_extension(filename, ext, expected_real_ext=None):
2558     name, real_ext = os.path.splitext(filename)
2559     return (
2560         f'{name}.{ext}{real_ext}'
2561         if not expected_real_ext or real_ext[1:] == expected_real_ext
2562         else f'{filename}.{ext}')
2563
2564
2565 def replace_extension(filename, ext, expected_real_ext=None):
2566     name, real_ext = os.path.splitext(filename)
2567     return '{}.{}'.format(
2568         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2569         ext)
2570
2571
2572 def check_executable(exe, args=[]):
2573     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2574     args can be a list of arguments for a short output (like -version) """
2575     try:
2576         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2577     except OSError:
2578         return False
2579     return exe
2580
2581
2582 def _get_exe_version_output(exe, args, *, to_screen=None):
2583     if to_screen:
2584         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2585     try:
2586         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2587         # SIGTTOU if yt-dlp is run in the background.
2588         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2589         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2590                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2591     except OSError:
2592         return False
2593     return stdout
2594
2595
2596 def detect_exe_version(output, version_re=None, unrecognized='present'):
2597     assert isinstance(output, compat_str)
2598     if version_re is None:
2599         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2600     m = re.search(version_re, output)
2601     if m:
2602         return m.group(1)
2603     else:
2604         return unrecognized
2605
2606
2607 def get_exe_version(exe, args=['--version'],
2608                     version_re=None, unrecognized='present'):
2609     """ Returns the version of the specified executable,
2610     or False if the executable is not present """
2611     out = _get_exe_version_output(exe, args)
2612     return detect_exe_version(out, version_re, unrecognized) if out else False
2613
2614
2615 def frange(start=0, stop=None, step=1):
2616     """Float range"""
2617     if stop is None:
2618         start, stop = 0, start
2619     sign = [-1, 1][step > 0] if step else 0
2620     while sign * start < sign * stop:
2621         yield start
2622         start += step
2623
2624
2625 class LazyList(collections.abc.Sequence):
2626     """Lazy immutable list from an iterable
2627     Note that slices of a LazyList are lists and not LazyList"""
2628
2629     class IndexError(IndexError):
2630         pass
2631
2632     def __init__(self, iterable, *, reverse=False, _cache=None):
2633         self._iterable = iter(iterable)
2634         self._cache = [] if _cache is None else _cache
2635         self._reversed = reverse
2636
2637     def __iter__(self):
2638         if self._reversed:
2639             # We need to consume the entire iterable to iterate in reverse
2640             yield from self.exhaust()
2641             return
2642         yield from self._cache
2643         for item in self._iterable:
2644             self._cache.append(item)
2645             yield item
2646
2647     def _exhaust(self):
2648         self._cache.extend(self._iterable)
2649         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2650         return self._cache
2651
2652     def exhaust(self):
2653         """Evaluate the entire iterable"""
2654         return self._exhaust()[::-1 if self._reversed else 1]
2655
2656     @staticmethod
2657     def _reverse_index(x):
2658         return None if x is None else -(x + 1)
2659
2660     def __getitem__(self, idx):
2661         if isinstance(idx, slice):
2662             if self._reversed:
2663                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2664             start, stop, step = idx.start, idx.stop, idx.step or 1
2665         elif isinstance(idx, int):
2666             if self._reversed:
2667                 idx = self._reverse_index(idx)
2668             start, stop, step = idx, idx, 0
2669         else:
2670             raise TypeError('indices must be integers or slices')
2671         if ((start or 0) < 0 or (stop or 0) < 0
2672                 or (start is None and step < 0)
2673                 or (stop is None and step > 0)):
2674             # We need to consume the entire iterable to be able to slice from the end
2675             # Obviously, never use this with infinite iterables
2676             self._exhaust()
2677             try:
2678                 return self._cache[idx]
2679             except IndexError as e:
2680                 raise self.IndexError(e) from e
2681         n = max(start or 0, stop or 0) - len(self._cache) + 1
2682         if n > 0:
2683             self._cache.extend(itertools.islice(self._iterable, n))
2684         try:
2685             return self._cache[idx]
2686         except IndexError as e:
2687             raise self.IndexError(e) from e
2688
2689     def __bool__(self):
2690         try:
2691             self[-1] if self._reversed else self[0]
2692         except self.IndexError:
2693             return False
2694         return True
2695
2696     def __len__(self):
2697         self._exhaust()
2698         return len(self._cache)
2699
2700     def __reversed__(self):
2701         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2702
2703     def __copy__(self):
2704         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2705
2706     def __repr__(self):
2707         # repr and str should mimic a list. So we exhaust the iterable
2708         return repr(self.exhaust())
2709
2710     def __str__(self):
2711         return repr(self.exhaust())
2712
2713
2714 class PagedList:
2715
2716     class IndexError(IndexError):
2717         pass
2718
2719     def __len__(self):
2720         # This is only useful for tests
2721         return len(self.getslice())
2722
2723     def __init__(self, pagefunc, pagesize, use_cache=True):
2724         self._pagefunc = pagefunc
2725         self._pagesize = pagesize
2726         self._pagecount = float('inf')
2727         self._use_cache = use_cache
2728         self._cache = {}
2729
2730     def getpage(self, pagenum):
2731         page_results = self._cache.get(pagenum)
2732         if page_results is None:
2733             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2734         if self._use_cache:
2735             self._cache[pagenum] = page_results
2736         return page_results
2737
2738     def getslice(self, start=0, end=None):
2739         return list(self._getslice(start, end))
2740
2741     def _getslice(self, start, end):
2742         raise NotImplementedError('This method must be implemented by subclasses')
2743
2744     def __getitem__(self, idx):
2745         assert self._use_cache, 'Indexing PagedList requires cache'
2746         if not isinstance(idx, int) or idx < 0:
2747             raise TypeError('indices must be non-negative integers')
2748         entries = self.getslice(idx, idx + 1)
2749         if not entries:
2750             raise self.IndexError()
2751         return entries[0]
2752
2753
2754 class OnDemandPagedList(PagedList):
2755     """Download pages until a page with less than maximum results"""
2756
2757     def _getslice(self, start, end):
2758         for pagenum in itertools.count(start // self._pagesize):
2759             firstid = pagenum * self._pagesize
2760             nextfirstid = pagenum * self._pagesize + self._pagesize
2761             if start >= nextfirstid:
2762                 continue
2763
2764             startv = (
2765                 start % self._pagesize
2766                 if firstid <= start < nextfirstid
2767                 else 0)
2768             endv = (
2769                 ((end - 1) % self._pagesize) + 1
2770                 if (end is not None and firstid <= end <= nextfirstid)
2771                 else None)
2772
2773             try:
2774                 page_results = self.getpage(pagenum)
2775             except Exception:
2776                 self._pagecount = pagenum - 1
2777                 raise
2778             if startv != 0 or endv is not None:
2779                 page_results = page_results[startv:endv]
2780             yield from page_results
2781
2782             # A little optimization - if current page is not "full", ie. does
2783             # not contain page_size videos then we can assume that this page
2784             # is the last one - there are no more ids on further pages -
2785             # i.e. no need to query again.
2786             if len(page_results) + startv < self._pagesize:
2787                 break
2788
2789             # If we got the whole page, but the next page is not interesting,
2790             # break out early as well
2791             if end == nextfirstid:
2792                 break
2793
2794
2795 class InAdvancePagedList(PagedList):
2796     """PagedList with total number of pages known in advance"""
2797
2798     def __init__(self, pagefunc, pagecount, pagesize):
2799         PagedList.__init__(self, pagefunc, pagesize, True)
2800         self._pagecount = pagecount
2801
2802     def _getslice(self, start, end):
2803         start_page = start // self._pagesize
2804         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2805         skip_elems = start - start_page * self._pagesize
2806         only_more = None if end is None else end - start
2807         for pagenum in range(start_page, end_page):
2808             page_results = self.getpage(pagenum)
2809             if skip_elems:
2810                 page_results = page_results[skip_elems:]
2811                 skip_elems = None
2812             if only_more is not None:
2813                 if len(page_results) < only_more:
2814                     only_more -= len(page_results)
2815                 else:
2816                     yield from page_results[:only_more]
2817                     break
2818             yield from page_results
2819
2820
2821 class PlaylistEntries:
2822     MissingEntry = object()
2823     is_exhausted = False
2824
2825     def __init__(self, ydl, info_dict):
2826         self.ydl = ydl
2827
2828         # _entries must be assigned now since infodict can change during iteration
2829         entries = info_dict.get('entries')
2830         if entries is None:
2831             raise EntryNotInPlaylist('There are no entries')
2832         elif isinstance(entries, list):
2833             self.is_exhausted = True
2834
2835         requested_entries = info_dict.get('requested_entries')
2836         self.is_incomplete = bool(requested_entries)
2837         if self.is_incomplete:
2838             assert self.is_exhausted
2839             self._entries = [self.MissingEntry] * max(requested_entries)
2840             for i, entry in zip(requested_entries, entries):
2841                 self._entries[i - 1] = entry
2842         elif isinstance(entries, (list, PagedList, LazyList)):
2843             self._entries = entries
2844         else:
2845             self._entries = LazyList(entries)
2846
2847     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2848         (?P<start>[+-]?\d+)?
2849         (?P<range>[:-]
2850             (?P<end>[+-]?\d+|inf(?:inite)?)?
2851             (?::(?P<step>[+-]?\d+))?
2852         )?''')
2853
2854     @classmethod
2855     def parse_playlist_items(cls, string):
2856         for segment in string.split(','):
2857             if not segment:
2858                 raise ValueError('There is two or more consecutive commas')
2859             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2860             if not mobj:
2861                 raise ValueError(f'{segment!r} is not a valid specification')
2862             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2863             if int_or_none(step) == 0:
2864                 raise ValueError(f'Step in {segment!r} cannot be zero')
2865             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2866
2867     def get_requested_items(self):
2868         playlist_items = self.ydl.params.get('playlist_items')
2869         playlist_start = self.ydl.params.get('playliststart', 1)
2870         playlist_end = self.ydl.params.get('playlistend')
2871         # For backwards compatibility, interpret -1 as whole list
2872         if playlist_end in (-1, None):
2873             playlist_end = ''
2874         if not playlist_items:
2875             playlist_items = f'{playlist_start}:{playlist_end}'
2876         elif playlist_start != 1 or playlist_end:
2877             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
2878
2879         for index in self.parse_playlist_items(playlist_items):
2880             for i, entry in self[index]:
2881                 yield i, entry
2882                 try:
2883                     # TODO: Add auto-generated fields
2884                     self.ydl._match_entry(entry, incomplete=True, silent=True)
2885                 except (ExistingVideoReached, RejectedVideoReached):
2886                     return
2887
2888     def get_full_count(self):
2889         if self.is_exhausted and not self.is_incomplete:
2890             return len(self)
2891         elif isinstance(self._entries, InAdvancePagedList):
2892             if self._entries._pagesize == 1:
2893                 return self._entries._pagecount
2894
2895     @functools.cached_property
2896     def _getter(self):
2897         if isinstance(self._entries, list):
2898             def get_entry(i):
2899                 try:
2900                     entry = self._entries[i]
2901                 except IndexError:
2902                     entry = self.MissingEntry
2903                     if not self.is_incomplete:
2904                         raise self.IndexError()
2905                 if entry is self.MissingEntry:
2906                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
2907                 return entry
2908         else:
2909             def get_entry(i):
2910                 try:
2911                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
2912                 except (LazyList.IndexError, PagedList.IndexError):
2913                     raise self.IndexError()
2914         return get_entry
2915
2916     def __getitem__(self, idx):
2917         if isinstance(idx, int):
2918             idx = slice(idx, idx)
2919
2920         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
2921         step = 1 if idx.step is None else idx.step
2922         if idx.start is None:
2923             start = 0 if step > 0 else len(self) - 1
2924         else:
2925             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
2926
2927         # NB: Do not call len(self) when idx == [:]
2928         if idx.stop is None:
2929             stop = 0 if step < 0 else float('inf')
2930         else:
2931             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
2932         stop += [-1, 1][step > 0]
2933
2934         for i in frange(start, stop, step):
2935             if i < 0:
2936                 continue
2937             try:
2938                 entry = self._getter(i)
2939             except self.IndexError:
2940                 self.is_exhausted = True
2941                 if step > 0:
2942                     break
2943                 continue
2944             yield i + 1, entry
2945
2946     def __len__(self):
2947         return len(tuple(self[:]))
2948
2949     class IndexError(IndexError):
2950         pass
2951
2952
2953 def uppercase_escape(s):
2954     unicode_escape = codecs.getdecoder('unicode_escape')
2955     return re.sub(
2956         r'\\U[0-9a-fA-F]{8}',
2957         lambda m: unicode_escape(m.group(0))[0],
2958         s)
2959
2960
2961 def lowercase_escape(s):
2962     unicode_escape = codecs.getdecoder('unicode_escape')
2963     return re.sub(
2964         r'\\u[0-9a-fA-F]{4}',
2965         lambda m: unicode_escape(m.group(0))[0],
2966         s)
2967
2968
2969 def escape_rfc3986(s):
2970     """Escape non-ASCII characters as suggested by RFC 3986"""
2971     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2972
2973
2974 def escape_url(url):
2975     """Escape URL as suggested by RFC 3986"""
2976     url_parsed = compat_urllib_parse_urlparse(url)
2977     return url_parsed._replace(
2978         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2979         path=escape_rfc3986(url_parsed.path),
2980         params=escape_rfc3986(url_parsed.params),
2981         query=escape_rfc3986(url_parsed.query),
2982         fragment=escape_rfc3986(url_parsed.fragment)
2983     ).geturl()
2984
2985
2986 def parse_qs(url):
2987     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2988
2989
2990 def read_batch_urls(batch_fd):
2991     def fixup(url):
2992         if not isinstance(url, compat_str):
2993             url = url.decode('utf-8', 'replace')
2994         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
2995         for bom in BOM_UTF8:
2996             if url.startswith(bom):
2997                 url = url[len(bom):]
2998         url = url.lstrip()
2999         if not url or url.startswith(('#', ';', ']')):
3000             return False
3001         # "#" cannot be stripped out since it is part of the URI
3002         # However, it can be safely stipped out if follwing a whitespace
3003         return re.split(r'\s#', url, 1)[0].rstrip()
3004
3005     with contextlib.closing(batch_fd) as fd:
3006         return [url for url in map(fixup, fd) if url]
3007
3008
3009 def urlencode_postdata(*args, **kargs):
3010     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3011
3012
3013 def update_url_query(url, query):
3014     if not query:
3015         return url
3016     parsed_url = compat_urlparse.urlparse(url)
3017     qs = compat_parse_qs(parsed_url.query)
3018     qs.update(query)
3019     return compat_urlparse.urlunparse(parsed_url._replace(
3020         query=compat_urllib_parse_urlencode(qs, True)))
3021
3022
3023 def update_Request(req, url=None, data=None, headers={}, query={}):
3024     req_headers = req.headers.copy()
3025     req_headers.update(headers)
3026     req_data = data or req.data
3027     req_url = update_url_query(url or req.get_full_url(), query)
3028     req_get_method = req.get_method()
3029     if req_get_method == 'HEAD':
3030         req_type = HEADRequest
3031     elif req_get_method == 'PUT':
3032         req_type = PUTRequest
3033     else:
3034         req_type = compat_urllib_request.Request
3035     new_req = req_type(
3036         req_url, data=req_data, headers=req_headers,
3037         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3038     if hasattr(req, 'timeout'):
3039         new_req.timeout = req.timeout
3040     return new_req
3041
3042
3043 def _multipart_encode_impl(data, boundary):
3044     content_type = 'multipart/form-data; boundary=%s' % boundary
3045
3046     out = b''
3047     for k, v in data.items():
3048         out += b'--' + boundary.encode('ascii') + b'\r\n'
3049         if isinstance(k, compat_str):
3050             k = k.encode()
3051         if isinstance(v, compat_str):
3052             v = v.encode()
3053         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3054         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3055         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3056         if boundary.encode('ascii') in content:
3057             raise ValueError('Boundary overlaps with data')
3058         out += content
3059
3060     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3061
3062     return out, content_type
3063
3064
3065 def multipart_encode(data, boundary=None):
3066     '''
3067     Encode a dict to RFC 7578-compliant form-data
3068
3069     data:
3070         A dict where keys and values can be either Unicode or bytes-like
3071         objects.
3072     boundary:
3073         If specified a Unicode object, it's used as the boundary. Otherwise
3074         a random boundary is generated.
3075
3076     Reference: https://tools.ietf.org/html/rfc7578
3077     '''
3078     has_specified_boundary = boundary is not None
3079
3080     while True:
3081         if boundary is None:
3082             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3083
3084         try:
3085             out, content_type = _multipart_encode_impl(data, boundary)
3086             break
3087         except ValueError:
3088             if has_specified_boundary:
3089                 raise
3090             boundary = None
3091
3092     return out, content_type
3093
3094
3095 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3096     for val in map(d.get, variadic(key_or_keys)):
3097         if val is not None and (val or not skip_false_values):
3098             return val
3099     return default
3100
3101
3102 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3103     for f in funcs:
3104         try:
3105             val = f(*args, **kwargs)
3106         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
3107             pass
3108         else:
3109             if expected_type is None or isinstance(val, expected_type):
3110                 return val
3111
3112
3113 def try_get(src, getter, expected_type=None):
3114     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3115
3116
3117 def filter_dict(dct, cndn=lambda _, v: v is not None):
3118     return {k: v for k, v in dct.items() if cndn(k, v)}
3119
3120
3121 def merge_dicts(*dicts):
3122     merged = {}
3123     for a_dict in dicts:
3124         for k, v in a_dict.items():
3125             if (v is not None and k not in merged
3126                     or isinstance(v, str) and merged[k] == ''):
3127                 merged[k] = v
3128     return merged
3129
3130
3131 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3132     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
3133
3134
3135 US_RATINGS = {
3136     'G': 0,
3137     'PG': 10,
3138     'PG-13': 13,
3139     'R': 16,
3140     'NC': 18,
3141 }
3142
3143
3144 TV_PARENTAL_GUIDELINES = {
3145     'TV-Y': 0,
3146     'TV-Y7': 7,
3147     'TV-G': 0,
3148     'TV-PG': 0,
3149     'TV-14': 14,
3150     'TV-MA': 17,
3151 }
3152
3153
3154 def parse_age_limit(s):
3155     # isinstance(False, int) is True. So type() must be used instead
3156     if type(s) is int:  # noqa: E721
3157         return s if 0 <= s <= 21 else None
3158     elif not isinstance(s, str):
3159         return None
3160     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3161     if m:
3162         return int(m.group('age'))
3163     s = s.upper()
3164     if s in US_RATINGS:
3165         return US_RATINGS[s]
3166     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3167     if m:
3168         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3169     return None
3170
3171
3172 def strip_jsonp(code):
3173     return re.sub(
3174         r'''(?sx)^
3175             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3176             (?:\s*&&\s*(?P=func_name))?
3177             \s*\(\s*(?P<callback_data>.*)\);?
3178             \s*?(?://[^\n]*)*$''',
3179         r'\g<callback_data>', code)
3180
3181
3182 def js_to_json(code, vars={}):
3183     # vars is a dict of var, val pairs to substitute
3184     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3185     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3186     INTEGER_TABLE = (
3187         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3188         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3189     )
3190
3191     def fix_kv(m):
3192         v = m.group(0)
3193         if v in ('true', 'false', 'null'):
3194             return v
3195         elif v in ('undefined', 'void 0'):
3196             return 'null'
3197         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3198             return ""
3199
3200         if v[0] in ("'", '"'):
3201             v = re.sub(r'(?s)\\.|"', lambda m: {
3202                 '"': '\\"',
3203                 "\\'": "'",
3204                 '\\\n': '',
3205                 '\\x': '\\u00',
3206             }.get(m.group(0), m.group(0)), v[1:-1])
3207         else:
3208             for regex, base in INTEGER_TABLE:
3209                 im = re.match(regex, v)
3210                 if im:
3211                     i = int(im.group(1), base)
3212                     return '"%d":' % i if v.endswith(':') else '%d' % i
3213
3214             if v in vars:
3215                 return vars[v]
3216
3217         return '"%s"' % v
3218
3219     def create_map(mobj):
3220         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
3221
3222     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3223     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
3224
3225     return re.sub(r'''(?sx)
3226         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
3227         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
3228         {comment}|,(?={skip}[\]}}])|
3229         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3230         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3231         [0-9]+(?={skip}:)|
3232         !+
3233         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3234
3235
3236 def qualities(quality_ids):
3237     """ Get a numeric quality value out of a list of possible values """
3238     def q(qid):
3239         try:
3240             return quality_ids.index(qid)
3241         except ValueError:
3242             return -1
3243     return q
3244
3245
3246 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3247
3248
3249 DEFAULT_OUTTMPL = {
3250     'default': '%(title)s [%(id)s].%(ext)s',
3251     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3252 }
3253 OUTTMPL_TYPES = {
3254     'chapter': None,
3255     'subtitle': None,
3256     'thumbnail': None,
3257     'description': 'description',
3258     'annotation': 'annotations.xml',
3259     'infojson': 'info.json',
3260     'link': None,
3261     'pl_video': None,
3262     'pl_thumbnail': None,
3263     'pl_description': 'description',
3264     'pl_infojson': 'info.json',
3265 }
3266
3267 # As of [1] format syntax is:
3268 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3269 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3270 STR_FORMAT_RE_TMPL = r'''(?x)
3271     (?<!%)(?P<prefix>(?:%%)*)
3272     %
3273     (?P<has_key>\((?P<key>{0})\))?
3274     (?P<format>
3275         (?P<conversion>[#0\-+ ]+)?
3276         (?P<min_width>\d+)?
3277         (?P<precision>\.\d+)?
3278         (?P<len_mod>[hlL])?  # unused in python
3279         {1}  # conversion type
3280     )
3281 '''
3282
3283
3284 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3285
3286
3287 def limit_length(s, length):
3288     """ Add ellipses to overly long strings """
3289     if s is None:
3290         return None
3291     ELLIPSES = '...'
3292     if len(s) > length:
3293         return s[:length - len(ELLIPSES)] + ELLIPSES
3294     return s
3295
3296
3297 def version_tuple(v):
3298     return tuple(int(e) for e in re.split(r'[-.]', v))
3299
3300
3301 def is_outdated_version(version, limit, assume_new=True):
3302     if not version:
3303         return not assume_new
3304     try:
3305         return version_tuple(version) < version_tuple(limit)
3306     except ValueError:
3307         return not assume_new
3308
3309
3310 def ytdl_is_updateable():
3311     """ Returns if yt-dlp can be updated with -U """
3312
3313     from .update import is_non_updateable
3314
3315     return not is_non_updateable()
3316
3317
3318 def args_to_str(args):
3319     # Get a short string representation for a subprocess command
3320     return ' '.join(compat_shlex_quote(a) for a in args)
3321
3322
3323 def error_to_compat_str(err):
3324     return str(err)
3325
3326
3327 def error_to_str(err):
3328     return f'{type(err).__name__}: {err}'
3329
3330
3331 def mimetype2ext(mt):
3332     if mt is None:
3333         return None
3334
3335     mt, _, params = mt.partition(';')
3336     mt = mt.strip()
3337
3338     FULL_MAP = {
3339         'audio/mp4': 'm4a',
3340         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3341         # it's the most popular one
3342         'audio/mpeg': 'mp3',
3343         'audio/x-wav': 'wav',
3344         'audio/wav': 'wav',
3345         'audio/wave': 'wav',
3346     }
3347
3348     ext = FULL_MAP.get(mt)
3349     if ext is not None:
3350         return ext
3351
3352     SUBTYPE_MAP = {
3353         '3gpp': '3gp',
3354         'smptett+xml': 'tt',
3355         'ttaf+xml': 'dfxp',
3356         'ttml+xml': 'ttml',
3357         'x-flv': 'flv',
3358         'x-mp4-fragmented': 'mp4',
3359         'x-ms-sami': 'sami',
3360         'x-ms-wmv': 'wmv',
3361         'mpegurl': 'm3u8',
3362         'x-mpegurl': 'm3u8',
3363         'vnd.apple.mpegurl': 'm3u8',
3364         'dash+xml': 'mpd',
3365         'f4m+xml': 'f4m',
3366         'hds+xml': 'f4m',
3367         'vnd.ms-sstr+xml': 'ism',
3368         'quicktime': 'mov',
3369         'mp2t': 'ts',
3370         'x-wav': 'wav',
3371         'filmstrip+json': 'fs',
3372         'svg+xml': 'svg',
3373     }
3374
3375     _, _, subtype = mt.rpartition('/')
3376     ext = SUBTYPE_MAP.get(subtype.lower())
3377     if ext is not None:
3378         return ext
3379
3380     SUFFIX_MAP = {
3381         'json': 'json',
3382         'xml': 'xml',
3383         'zip': 'zip',
3384         'gzip': 'gz',
3385     }
3386
3387     _, _, suffix = subtype.partition('+')
3388     ext = SUFFIX_MAP.get(suffix)
3389     if ext is not None:
3390         return ext
3391
3392     return subtype.replace('+', '.')
3393
3394
3395 def ext2mimetype(ext_or_url):
3396     if not ext_or_url:
3397         return None
3398     if '.' not in ext_or_url:
3399         ext_or_url = f'file.{ext_or_url}'
3400     return mimetypes.guess_type(ext_or_url)[0]
3401
3402
3403 def parse_codecs(codecs_str):
3404     # http://tools.ietf.org/html/rfc6381
3405     if not codecs_str:
3406         return {}
3407     split_codecs = list(filter(None, map(
3408         str.strip, codecs_str.strip().strip(',').split(','))))
3409     vcodec, acodec, scodec, hdr = None, None, None, None
3410     for full_codec in split_codecs:
3411         parts = full_codec.split('.')
3412         codec = parts[0].replace('0', '')
3413         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3414                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3415             if not vcodec:
3416                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
3417                 if codec in ('dvh1', 'dvhe'):
3418                     hdr = 'DV'
3419                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
3420                     hdr = 'HDR10'
3421                 elif full_codec.replace('0', '').startswith('vp9.2'):
3422                     hdr = 'HDR10'
3423         elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3424             if not acodec:
3425                 acodec = full_codec
3426         elif codec in ('stpp', 'wvtt',):
3427             if not scodec:
3428                 scodec = full_codec
3429         else:
3430             write_string(f'WARNING: Unknown codec {full_codec}\n')
3431     if vcodec or acodec or scodec:
3432         return {
3433             'vcodec': vcodec or 'none',
3434             'acodec': acodec or 'none',
3435             'dynamic_range': hdr,
3436             **({'scodec': scodec} if scodec is not None else {}),
3437         }
3438     elif len(split_codecs) == 2:
3439         return {
3440             'vcodec': split_codecs[0],
3441             'acodec': split_codecs[1],
3442         }
3443     return {}
3444
3445
3446 def urlhandle_detect_ext(url_handle):
3447     getheader = url_handle.headers.get
3448
3449     cd = getheader('Content-Disposition')
3450     if cd:
3451         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3452         if m:
3453             e = determine_ext(m.group('filename'), default_ext=None)
3454             if e:
3455                 return e
3456
3457     return mimetype2ext(getheader('Content-Type'))
3458
3459
3460 def encode_data_uri(data, mime_type):
3461     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3462
3463
3464 def age_restricted(content_limit, age_limit):
3465     """ Returns True iff the content should be blocked """
3466
3467     if age_limit is None:  # No limit set
3468         return False
3469     if content_limit is None:
3470         return False  # Content available for everyone
3471     return age_limit < content_limit
3472
3473
3474 def is_html(first_bytes):
3475     """ Detect whether a file contains HTML by examining its first bytes. """
3476
3477     BOMS = [
3478         (b'\xef\xbb\xbf', 'utf-8'),
3479         (b'\x00\x00\xfe\xff', 'utf-32-be'),
3480         (b'\xff\xfe\x00\x00', 'utf-32-le'),
3481         (b'\xff\xfe', 'utf-16-le'),
3482         (b'\xfe\xff', 'utf-16-be'),
3483     ]
3484
3485     encoding = 'utf-8'
3486     for bom, enc in BOMS:
3487         while first_bytes.startswith(bom):
3488             encoding, first_bytes = enc, first_bytes[len(bom):]
3489
3490     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3491
3492
3493 def determine_protocol(info_dict):
3494     protocol = info_dict.get('protocol')
3495     if protocol is not None:
3496         return protocol
3497
3498     url = sanitize_url(info_dict['url'])
3499     if url.startswith('rtmp'):
3500         return 'rtmp'
3501     elif url.startswith('mms'):
3502         return 'mms'
3503     elif url.startswith('rtsp'):
3504         return 'rtsp'
3505
3506     ext = determine_ext(url)
3507     if ext == 'm3u8':
3508         return 'm3u8'
3509     elif ext == 'f4m':
3510         return 'f4m'
3511
3512     return compat_urllib_parse_urlparse(url).scheme
3513
3514
3515 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3516     """ Render a list of rows, each as a list of values.
3517     Text after a \t will be right aligned """
3518     def width(string):
3519         return len(remove_terminal_sequences(string).replace('\t', ''))
3520
3521     def get_max_lens(table):
3522         return [max(width(str(v)) for v in col) for col in zip(*table)]
3523
3524     def filter_using_list(row, filterArray):
3525         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3526
3527     max_lens = get_max_lens(data) if hide_empty else []
3528     header_row = filter_using_list(header_row, max_lens)
3529     data = [filter_using_list(row, max_lens) for row in data]
3530
3531     table = [header_row] + data
3532     max_lens = get_max_lens(table)
3533     extra_gap += 1
3534     if delim:
3535         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3536         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3537     for row in table:
3538         for pos, text in enumerate(map(str, row)):
3539             if '\t' in text:
3540                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3541             else:
3542                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3543     ret = '\n'.join(''.join(row).rstrip() for row in table)
3544     return ret
3545
3546
3547 def _match_one(filter_part, dct, incomplete):
3548     # TODO: Generalize code with YoutubeDL._build_format_filter
3549     STRING_OPERATORS = {
3550         '*=': operator.contains,
3551         '^=': lambda attr, value: attr.startswith(value),
3552         '$=': lambda attr, value: attr.endswith(value),
3553         '~=': lambda attr, value: re.search(value, attr),
3554     }
3555     COMPARISON_OPERATORS = {
3556         **STRING_OPERATORS,
3557         '<=': operator.le,  # "<=" must be defined above "<"
3558         '<': operator.lt,
3559         '>=': operator.ge,
3560         '>': operator.gt,
3561         '=': operator.eq,
3562     }
3563
3564     if isinstance(incomplete, bool):
3565         is_incomplete = lambda _: incomplete
3566     else:
3567         is_incomplete = lambda k: k in incomplete
3568
3569     operator_rex = re.compile(r'''(?x)
3570         (?P<key>[a-z_]+)
3571         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3572         (?:
3573             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3574             (?P<strval>.+?)
3575         )
3576         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3577     m = operator_rex.fullmatch(filter_part.strip())
3578     if m:
3579         m = m.groupdict()
3580         unnegated_op = COMPARISON_OPERATORS[m['op']]
3581         if m['negation']:
3582             op = lambda attr, value: not unnegated_op(attr, value)
3583         else:
3584             op = unnegated_op
3585         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3586         if m['quote']:
3587             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3588         actual_value = dct.get(m['key'])
3589         numeric_comparison = None
3590         if isinstance(actual_value, (int, float)):
3591             # If the original field is a string and matching comparisonvalue is
3592             # a number we should respect the origin of the original field
3593             # and process comparison value as a string (see
3594             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3595             try:
3596                 numeric_comparison = int(comparison_value)
3597             except ValueError:
3598                 numeric_comparison = parse_filesize(comparison_value)
3599                 if numeric_comparison is None:
3600                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3601                 if numeric_comparison is None:
3602                     numeric_comparison = parse_duration(comparison_value)
3603         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3604             raise ValueError('Operator %s only supports string values!' % m['op'])
3605         if actual_value is None:
3606             return is_incomplete(m['key']) or m['none_inclusive']
3607         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3608
3609     UNARY_OPERATORS = {
3610         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3611         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3612     }
3613     operator_rex = re.compile(r'''(?x)
3614         (?P<op>%s)\s*(?P<key>[a-z_]+)
3615         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3616     m = operator_rex.fullmatch(filter_part.strip())
3617     if m:
3618         op = UNARY_OPERATORS[m.group('op')]
3619         actual_value = dct.get(m.group('key'))
3620         if is_incomplete(m.group('key')) and actual_value is None:
3621             return True
3622         return op(actual_value)
3623
3624     raise ValueError('Invalid filter part %r' % filter_part)
3625
3626
3627 def match_str(filter_str, dct, incomplete=False):
3628     """ Filter a dictionary with a simple string syntax.
3629     @returns           Whether the filter passes
3630     @param incomplete  Set of keys that is expected to be missing from dct.
3631                        Can be True/False to indicate all/none of the keys may be missing.
3632                        All conditions on incomplete keys pass if the key is missing
3633     """
3634     return all(
3635         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3636         for filter_part in re.split(r'(?<!\\)&', filter_str))
3637
3638
3639 def match_filter_func(filters):
3640     if not filters:
3641         return None
3642     filters = set(variadic(filters))
3643
3644     interactive = '-' in filters
3645     if interactive:
3646         filters.remove('-')
3647
3648     def _match_func(info_dict, incomplete=False):
3649         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3650             return NO_DEFAULT if interactive and not incomplete else None
3651         else:
3652             video_title = info_dict.get('title') or info_dict.get('id') or 'video'
3653             filter_str = ') | ('.join(map(str.strip, filters))
3654             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3655     return _match_func
3656
3657
3658 def download_range_func(chapters, ranges):
3659     def inner(info_dict, ydl):
3660         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3661                    else 'Cannot match chapters since chapter information is unavailable')
3662         for regex in chapters or []:
3663             for i, chapter in enumerate(info_dict.get('chapters') or []):
3664                 if re.search(regex, chapter['title']):
3665                     warning = None
3666                     yield {**chapter, 'index': i}
3667         if chapters and warning:
3668             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3669
3670         yield from ({'start_time': start, 'end_time': end} for start, end in ranges or [])
3671
3672     return inner
3673
3674
3675 def parse_dfxp_time_expr(time_expr):
3676     if not time_expr:
3677         return
3678
3679     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3680     if mobj:
3681         return float(mobj.group('time_offset'))
3682
3683     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3684     if mobj:
3685         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3686
3687
3688 def srt_subtitles_timecode(seconds):
3689     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3690
3691
3692 def ass_subtitles_timecode(seconds):
3693     time = timetuple_from_msec(seconds * 1000)
3694     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3695
3696
3697 def dfxp2srt(dfxp_data):
3698     '''
3699     @param dfxp_data A bytes-like object containing DFXP data
3700     @returns A unicode object containing converted SRT data
3701     '''
3702     LEGACY_NAMESPACES = (
3703         (b'http://www.w3.org/ns/ttml', [
3704             b'http://www.w3.org/2004/11/ttaf1',
3705             b'http://www.w3.org/2006/04/ttaf1',
3706             b'http://www.w3.org/2006/10/ttaf1',
3707         ]),
3708         (b'http://www.w3.org/ns/ttml#styling', [
3709             b'http://www.w3.org/ns/ttml#style',
3710         ]),
3711     )
3712
3713     SUPPORTED_STYLING = [
3714         'color',
3715         'fontFamily',
3716         'fontSize',
3717         'fontStyle',
3718         'fontWeight',
3719         'textDecoration'
3720     ]
3721
3722     _x = functools.partial(xpath_with_ns, ns_map={
3723         'xml': 'http://www.w3.org/XML/1998/namespace',
3724         'ttml': 'http://www.w3.org/ns/ttml',
3725         'tts': 'http://www.w3.org/ns/ttml#styling',
3726     })
3727
3728     styles = {}
3729     default_style = {}
3730
3731     class TTMLPElementParser:
3732         _out = ''
3733         _unclosed_elements = []
3734         _applied_styles = []
3735
3736         def start(self, tag, attrib):
3737             if tag in (_x('ttml:br'), 'br'):
3738                 self._out += '\n'
3739             else:
3740                 unclosed_elements = []
3741                 style = {}
3742                 element_style_id = attrib.get('style')
3743                 if default_style:
3744                     style.update(default_style)
3745                 if element_style_id:
3746                     style.update(styles.get(element_style_id, {}))
3747                 for prop in SUPPORTED_STYLING:
3748                     prop_val = attrib.get(_x('tts:' + prop))
3749                     if prop_val:
3750                         style[prop] = prop_val
3751                 if style:
3752                     font = ''
3753                     for k, v in sorted(style.items()):
3754                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3755                             continue
3756                         if k == 'color':
3757                             font += ' color="%s"' % v
3758                         elif k == 'fontSize':
3759                             font += ' size="%s"' % v
3760                         elif k == 'fontFamily':
3761                             font += ' face="%s"' % v
3762                         elif k == 'fontWeight' and v == 'bold':
3763                             self._out += '<b>'
3764                             unclosed_elements.append('b')
3765                         elif k == 'fontStyle' and v == 'italic':
3766                             self._out += '<i>'
3767                             unclosed_elements.append('i')
3768                         elif k == 'textDecoration' and v == 'underline':
3769                             self._out += '<u>'
3770                             unclosed_elements.append('u')
3771                     if font:
3772                         self._out += '<font' + font + '>'
3773                         unclosed_elements.append('font')
3774                     applied_style = {}
3775                     if self._applied_styles:
3776                         applied_style.update(self._applied_styles[-1])
3777                     applied_style.update(style)
3778                     self._applied_styles.append(applied_style)
3779                 self._unclosed_elements.append(unclosed_elements)
3780
3781         def end(self, tag):
3782             if tag not in (_x('ttml:br'), 'br'):
3783                 unclosed_elements = self._unclosed_elements.pop()
3784                 for element in reversed(unclosed_elements):
3785                     self._out += '</%s>' % element
3786                 if unclosed_elements and self._applied_styles:
3787                     self._applied_styles.pop()
3788
3789         def data(self, data):
3790             self._out += data
3791
3792         def close(self):
3793             return self._out.strip()
3794
3795     def parse_node(node):
3796         target = TTMLPElementParser()
3797         parser = xml.etree.ElementTree.XMLParser(target=target)
3798         parser.feed(xml.etree.ElementTree.tostring(node))
3799         return parser.close()
3800
3801     for k, v in LEGACY_NAMESPACES:
3802         for ns in v:
3803             dfxp_data = dfxp_data.replace(ns, k)
3804
3805     dfxp = compat_etree_fromstring(dfxp_data)
3806     out = []
3807     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3808
3809     if not paras:
3810         raise ValueError('Invalid dfxp/TTML subtitle')
3811
3812     repeat = False
3813     while True:
3814         for style in dfxp.findall(_x('.//ttml:style')):
3815             style_id = style.get('id') or style.get(_x('xml:id'))
3816             if not style_id:
3817                 continue
3818             parent_style_id = style.get('style')
3819             if parent_style_id:
3820                 if parent_style_id not in styles:
3821                     repeat = True
3822                     continue
3823                 styles[style_id] = styles[parent_style_id].copy()
3824             for prop in SUPPORTED_STYLING:
3825                 prop_val = style.get(_x('tts:' + prop))
3826                 if prop_val:
3827                     styles.setdefault(style_id, {})[prop] = prop_val
3828         if repeat:
3829             repeat = False
3830         else:
3831             break
3832
3833     for p in ('body', 'div'):
3834         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3835         if ele is None:
3836             continue
3837         style = styles.get(ele.get('style'))
3838         if not style:
3839             continue
3840         default_style.update(style)
3841
3842     for para, index in zip(paras, itertools.count(1)):
3843         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3844         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3845         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3846         if begin_time is None:
3847             continue
3848         if not end_time:
3849             if not dur:
3850                 continue
3851             end_time = begin_time + dur
3852         out.append('%d\n%s --> %s\n%s\n\n' % (
3853             index,
3854             srt_subtitles_timecode(begin_time),
3855             srt_subtitles_timecode(end_time),
3856             parse_node(para)))
3857
3858     return ''.join(out)
3859
3860
3861 def cli_option(params, command_option, param, separator=None):
3862     param = params.get(param)
3863     return ([] if param is None
3864             else [command_option, str(param)] if separator is None
3865             else [f'{command_option}{separator}{param}'])
3866
3867
3868 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3869     param = params.get(param)
3870     assert param in (True, False, None)
3871     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
3872
3873
3874 def cli_valueless_option(params, command_option, param, expected_value=True):
3875     return [command_option] if params.get(param) == expected_value else []
3876
3877
3878 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3879     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3880         if use_compat:
3881             return argdict
3882         else:
3883             argdict = None
3884     if argdict is None:
3885         return default
3886     assert isinstance(argdict, dict)
3887
3888     assert isinstance(keys, (list, tuple))
3889     for key_list in keys:
3890         arg_list = list(filter(
3891             lambda x: x is not None,
3892             [argdict.get(key.lower()) for key in variadic(key_list)]))
3893         if arg_list:
3894             return [arg for args in arg_list for arg in args]
3895     return default
3896
3897
3898 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3899     main_key, exe = main_key.lower(), exe.lower()
3900     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3901     keys = [f'{root_key}{k}' for k in (keys or [''])]
3902     if root_key in keys:
3903         if main_key != exe:
3904             keys.append((main_key, exe))
3905         keys.append('default')
3906     else:
3907         use_compat = False
3908     return cli_configuration_args(argdict, keys, default, use_compat)
3909
3910
3911 class ISO639Utils:
3912     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3913     _lang_map = {
3914         'aa': 'aar',
3915         'ab': 'abk',
3916         'ae': 'ave',
3917         'af': 'afr',
3918         'ak': 'aka',
3919         'am': 'amh',
3920         'an': 'arg',
3921         'ar': 'ara',
3922         'as': 'asm',
3923         'av': 'ava',
3924         'ay': 'aym',
3925         'az': 'aze',
3926         'ba': 'bak',
3927         'be': 'bel',
3928         'bg': 'bul',
3929         'bh': 'bih',
3930         'bi': 'bis',
3931         'bm': 'bam',
3932         'bn': 'ben',
3933         'bo': 'bod',
3934         'br': 'bre',
3935         'bs': 'bos',
3936         'ca': 'cat',
3937         'ce': 'che',
3938         'ch': 'cha',
3939         'co': 'cos',
3940         'cr': 'cre',
3941         'cs': 'ces',
3942         'cu': 'chu',
3943         'cv': 'chv',
3944         'cy': 'cym',
3945         'da': 'dan',
3946         'de': 'deu',
3947         'dv': 'div',
3948         'dz': 'dzo',
3949         'ee': 'ewe',
3950         'el': 'ell',
3951         'en': 'eng',
3952         'eo': 'epo',
3953         'es': 'spa',
3954         'et': 'est',
3955         'eu': 'eus',
3956         'fa': 'fas',
3957         'ff': 'ful',
3958         'fi': 'fin',
3959         'fj': 'fij',
3960         'fo': 'fao',
3961         'fr': 'fra',
3962         'fy': 'fry',
3963         'ga': 'gle',
3964         'gd': 'gla',
3965         'gl': 'glg',
3966         'gn': 'grn',
3967         'gu': 'guj',
3968         'gv': 'glv',
3969         'ha': 'hau',
3970         'he': 'heb',
3971         'iw': 'heb',  # Replaced by he in 1989 revision
3972         'hi': 'hin',
3973         'ho': 'hmo',
3974         'hr': 'hrv',
3975         'ht': 'hat',
3976         'hu': 'hun',
3977         'hy': 'hye',
3978         'hz': 'her',
3979         'ia': 'ina',
3980         'id': 'ind',
3981         'in': 'ind',  # Replaced by id in 1989 revision
3982         'ie': 'ile',
3983         'ig': 'ibo',
3984         'ii': 'iii',
3985         'ik': 'ipk',
3986         'io': 'ido',
3987         'is': 'isl',
3988         'it': 'ita',
3989         'iu': 'iku',
3990         'ja': 'jpn',
3991         'jv': 'jav',
3992         'ka': 'kat',
3993         'kg': 'kon',
3994         'ki': 'kik',
3995         'kj': 'kua',
3996         'kk': 'kaz',
3997         'kl': 'kal',
3998         'km': 'khm',
3999         'kn': 'kan',
4000         'ko': 'kor',
4001         'kr': 'kau',
4002         'ks': 'kas',
4003         'ku': 'kur',
4004         'kv': 'kom',
4005         'kw': 'cor',
4006         'ky': 'kir',
4007         'la': 'lat',
4008         'lb': 'ltz',
4009         'lg': 'lug',
4010         'li': 'lim',
4011         'ln': 'lin',
4012         'lo': 'lao',
4013         'lt': 'lit',
4014         'lu': 'lub',
4015         'lv': 'lav',
4016         'mg': 'mlg',
4017         'mh': 'mah',
4018         'mi': 'mri',
4019         'mk': 'mkd',
4020         'ml': 'mal',
4021         'mn': 'mon',
4022         'mr': 'mar',
4023         'ms': 'msa',
4024         'mt': 'mlt',
4025         'my': 'mya',
4026         'na': 'nau',
4027         'nb': 'nob',
4028         'nd': 'nde',
4029         'ne': 'nep',
4030         'ng': 'ndo',
4031         'nl': 'nld',
4032         'nn': 'nno',
4033         'no': 'nor',
4034         'nr': 'nbl',
4035         'nv': 'nav',
4036         'ny': 'nya',
4037         'oc': 'oci',
4038         'oj': 'oji',
4039         'om': 'orm',
4040         'or': 'ori',
4041         'os': 'oss',
4042         'pa': 'pan',
4043         'pi': 'pli',
4044         'pl': 'pol',
4045         'ps': 'pus',
4046         'pt': 'por',
4047         'qu': 'que',
4048         'rm': 'roh',
4049         'rn': 'run',
4050         'ro': 'ron',
4051         'ru': 'rus',
4052         'rw': 'kin',
4053         'sa': 'san',
4054         'sc': 'srd',
4055         'sd': 'snd',
4056         'se': 'sme',
4057         'sg': 'sag',
4058         'si': 'sin',
4059         'sk': 'slk',
4060         'sl': 'slv',
4061         'sm': 'smo',
4062         'sn': 'sna',
4063         'so': 'som',
4064         'sq': 'sqi',
4065         'sr': 'srp',
4066         'ss': 'ssw',
4067         'st': 'sot',
4068         'su': 'sun',
4069         'sv': 'swe',
4070         'sw': 'swa',
4071         'ta': 'tam',
4072         'te': 'tel',
4073         'tg': 'tgk',
4074         'th': 'tha',
4075         'ti': 'tir',
4076         'tk': 'tuk',
4077         'tl': 'tgl',
4078         'tn': 'tsn',
4079         'to': 'ton',
4080         'tr': 'tur',
4081         'ts': 'tso',
4082         'tt': 'tat',
4083         'tw': 'twi',
4084         'ty': 'tah',
4085         'ug': 'uig',
4086         'uk': 'ukr',
4087         'ur': 'urd',
4088         'uz': 'uzb',
4089         've': 'ven',
4090         'vi': 'vie',
4091         'vo': 'vol',
4092         'wa': 'wln',
4093         'wo': 'wol',
4094         'xh': 'xho',
4095         'yi': 'yid',
4096         'ji': 'yid',  # Replaced by yi in 1989 revision
4097         'yo': 'yor',
4098         'za': 'zha',
4099         'zh': 'zho',
4100         'zu': 'zul',
4101     }
4102
4103     @classmethod
4104     def short2long(cls, code):
4105         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4106         return cls._lang_map.get(code[:2])
4107
4108     @classmethod
4109     def long2short(cls, code):
4110         """Convert language code from ISO 639-2/T to ISO 639-1"""
4111         for short_name, long_name in cls._lang_map.items():
4112             if long_name == code:
4113                 return short_name
4114
4115
4116 class ISO3166Utils:
4117     # From http://data.okfn.org/data/core/country-list
4118     _country_map = {
4119         'AF': 'Afghanistan',
4120         'AX': 'Åland Islands',
4121         'AL': 'Albania',
4122         'DZ': 'Algeria',
4123         'AS': 'American Samoa',
4124         'AD': 'Andorra',
4125         'AO': 'Angola',
4126         'AI': 'Anguilla',
4127         'AQ': 'Antarctica',
4128         'AG': 'Antigua and Barbuda',
4129         'AR': 'Argentina',
4130         'AM': 'Armenia',
4131         'AW': 'Aruba',
4132         'AU': 'Australia',
4133         'AT': 'Austria',
4134         'AZ': 'Azerbaijan',
4135         'BS': 'Bahamas',
4136         'BH': 'Bahrain',
4137         'BD': 'Bangladesh',
4138         'BB': 'Barbados',
4139         'BY': 'Belarus',
4140         'BE': 'Belgium',
4141         'BZ': 'Belize',
4142         'BJ': 'Benin',
4143         'BM': 'Bermuda',
4144         'BT': 'Bhutan',
4145         'BO': 'Bolivia, Plurinational State of',
4146         'BQ': 'Bonaire, Sint Eustatius and Saba',
4147         'BA': 'Bosnia and Herzegovina',
4148         'BW': 'Botswana',
4149         'BV': 'Bouvet Island',
4150         'BR': 'Brazil',
4151         'IO': 'British Indian Ocean Territory',
4152         'BN': 'Brunei Darussalam',
4153         'BG': 'Bulgaria',
4154         'BF': 'Burkina Faso',
4155         'BI': 'Burundi',
4156         'KH': 'Cambodia',
4157         'CM': 'Cameroon',
4158         'CA': 'Canada',
4159         'CV': 'Cape Verde',
4160         'KY': 'Cayman Islands',
4161         'CF': 'Central African Republic',
4162         'TD': 'Chad',
4163         'CL': 'Chile',
4164         'CN': 'China',
4165         'CX': 'Christmas Island',
4166         'CC': 'Cocos (Keeling) Islands',
4167         'CO': 'Colombia',
4168         'KM': 'Comoros',
4169         'CG': 'Congo',
4170         'CD': 'Congo, the Democratic Republic of the',
4171         'CK': 'Cook Islands',
4172         'CR': 'Costa Rica',
4173         'CI': 'Côte d\'Ivoire',
4174         'HR': 'Croatia',
4175         'CU': 'Cuba',
4176         'CW': 'Curaçao',
4177         'CY': 'Cyprus',
4178         'CZ': 'Czech Republic',
4179         'DK': 'Denmark',
4180         'DJ': 'Djibouti',
4181         'DM': 'Dominica',
4182         'DO': 'Dominican Republic',
4183         'EC': 'Ecuador',
4184         'EG': 'Egypt',
4185         'SV': 'El Salvador',
4186         'GQ': 'Equatorial Guinea',
4187         'ER': 'Eritrea',
4188         'EE': 'Estonia',
4189         'ET': 'Ethiopia',
4190         'FK': 'Falkland Islands (Malvinas)',
4191         'FO': 'Faroe Islands',
4192         'FJ': 'Fiji',
4193         'FI': 'Finland',
4194         'FR': 'France',
4195         'GF': 'French Guiana',
4196         'PF': 'French Polynesia',
4197         'TF': 'French Southern Territories',
4198         'GA': 'Gabon',
4199         'GM': 'Gambia',
4200         'GE': 'Georgia',
4201         'DE': 'Germany',
4202         'GH': 'Ghana',
4203         'GI': 'Gibraltar',
4204         'GR': 'Greece',
4205         'GL': 'Greenland',
4206         'GD': 'Grenada',
4207         'GP': 'Guadeloupe',
4208         'GU': 'Guam',
4209         'GT': 'Guatemala',
4210         'GG': 'Guernsey',
4211         'GN': 'Guinea',
4212         'GW': 'Guinea-Bissau',
4213         'GY': 'Guyana',
4214         'HT': 'Haiti',
4215         'HM': 'Heard Island and McDonald Islands',
4216         'VA': 'Holy See (Vatican City State)',
4217         'HN': 'Honduras',
4218         'HK': 'Hong Kong',
4219         'HU': 'Hungary',
4220         'IS': 'Iceland',
4221         'IN': 'India',
4222         'ID': 'Indonesia',
4223         'IR': 'Iran, Islamic Republic of',
4224         'IQ': 'Iraq',
4225         'IE': 'Ireland',
4226         'IM': 'Isle of Man',
4227         'IL': 'Israel',
4228         'IT': 'Italy',
4229         'JM': 'Jamaica',
4230         'JP': 'Japan',
4231         'JE': 'Jersey',
4232         'JO': 'Jordan',
4233         'KZ': 'Kazakhstan',
4234         'KE': 'Kenya',
4235         'KI': 'Kiribati',
4236         'KP': 'Korea, Democratic People\'s Republic of',
4237         'KR': 'Korea, Republic of',
4238         'KW': 'Kuwait',
4239         'KG': 'Kyrgyzstan',
4240         'LA': 'Lao People\'s Democratic Republic',
4241         'LV': 'Latvia',
4242         'LB': 'Lebanon',
4243         'LS': 'Lesotho',
4244         'LR': 'Liberia',
4245         'LY': 'Libya',
4246         'LI': 'Liechtenstein',
4247         'LT': 'Lithuania',
4248         'LU': 'Luxembourg',
4249         'MO': 'Macao',
4250         'MK': 'Macedonia, the Former Yugoslav Republic of',
4251         'MG': 'Madagascar',
4252         'MW': 'Malawi',
4253         'MY': 'Malaysia',
4254         'MV': 'Maldives',
4255         'ML': 'Mali',
4256         'MT': 'Malta',
4257         'MH': 'Marshall Islands',
4258         'MQ': 'Martinique',
4259         'MR': 'Mauritania',
4260         'MU': 'Mauritius',
4261         'YT': 'Mayotte',
4262         'MX': 'Mexico',
4263         'FM': 'Micronesia, Federated States of',
4264         'MD': 'Moldova, Republic of',
4265         'MC': 'Monaco',
4266         'MN': 'Mongolia',
4267         'ME': 'Montenegro',
4268         'MS': 'Montserrat',
4269         'MA': 'Morocco',
4270         'MZ': 'Mozambique',
4271         'MM': 'Myanmar',
4272         'NA': 'Namibia',
4273         'NR': 'Nauru',
4274         'NP': 'Nepal',
4275         'NL': 'Netherlands',
4276         'NC': 'New Caledonia',
4277         'NZ': 'New Zealand',
4278         'NI': 'Nicaragua',
4279         'NE': 'Niger',
4280         'NG': 'Nigeria',
4281         'NU': 'Niue',
4282         'NF': 'Norfolk Island',
4283         'MP': 'Northern Mariana Islands',
4284         'NO': 'Norway',
4285         'OM': 'Oman',
4286         'PK': 'Pakistan',
4287         'PW': 'Palau',
4288         'PS': 'Palestine, State of',
4289         'PA': 'Panama',
4290         'PG': 'Papua New Guinea',
4291         'PY': 'Paraguay',
4292         'PE': 'Peru',
4293         'PH': 'Philippines',
4294         'PN': 'Pitcairn',
4295         'PL': 'Poland',
4296         'PT': 'Portugal',
4297         'PR': 'Puerto Rico',
4298         'QA': 'Qatar',
4299         'RE': 'Réunion',
4300         'RO': 'Romania',
4301         'RU': 'Russian Federation',
4302         'RW': 'Rwanda',
4303         'BL': 'Saint Barthélemy',
4304         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4305         'KN': 'Saint Kitts and Nevis',
4306         'LC': 'Saint Lucia',
4307         'MF': 'Saint Martin (French part)',
4308         'PM': 'Saint Pierre and Miquelon',
4309         'VC': 'Saint Vincent and the Grenadines',
4310         'WS': 'Samoa',
4311         'SM': 'San Marino',
4312         'ST': 'Sao Tome and Principe',
4313         'SA': 'Saudi Arabia',
4314         'SN': 'Senegal',
4315         'RS': 'Serbia',
4316         'SC': 'Seychelles',
4317         'SL': 'Sierra Leone',
4318         'SG': 'Singapore',
4319         'SX': 'Sint Maarten (Dutch part)',
4320         'SK': 'Slovakia',
4321         'SI': 'Slovenia',
4322         'SB': 'Solomon Islands',
4323         'SO': 'Somalia',
4324         'ZA': 'South Africa',
4325         'GS': 'South Georgia and the South Sandwich Islands',
4326         'SS': 'South Sudan',
4327         'ES': 'Spain',
4328         'LK': 'Sri Lanka',
4329         'SD': 'Sudan',
4330         'SR': 'Suriname',
4331         'SJ': 'Svalbard and Jan Mayen',
4332         'SZ': 'Swaziland',
4333         'SE': 'Sweden',
4334         'CH': 'Switzerland',
4335         'SY': 'Syrian Arab Republic',
4336         'TW': 'Taiwan, Province of China',
4337         'TJ': 'Tajikistan',
4338         'TZ': 'Tanzania, United Republic of',
4339         'TH': 'Thailand',
4340         'TL': 'Timor-Leste',
4341         'TG': 'Togo',
4342         'TK': 'Tokelau',
4343         'TO': 'Tonga',
4344         'TT': 'Trinidad and Tobago',
4345         'TN': 'Tunisia',
4346         'TR': 'Turkey',
4347         'TM': 'Turkmenistan',
4348         'TC': 'Turks and Caicos Islands',
4349         'TV': 'Tuvalu',
4350         'UG': 'Uganda',
4351         'UA': 'Ukraine',
4352         'AE': 'United Arab Emirates',
4353         'GB': 'United Kingdom',
4354         'US': 'United States',
4355         'UM': 'United States Minor Outlying Islands',
4356         'UY': 'Uruguay',
4357         'UZ': 'Uzbekistan',
4358         'VU': 'Vanuatu',
4359         'VE': 'Venezuela, Bolivarian Republic of',
4360         'VN': 'Viet Nam',
4361         'VG': 'Virgin Islands, British',
4362         'VI': 'Virgin Islands, U.S.',
4363         'WF': 'Wallis and Futuna',
4364         'EH': 'Western Sahara',
4365         'YE': 'Yemen',
4366         'ZM': 'Zambia',
4367         'ZW': 'Zimbabwe',
4368         # Not ISO 3166 codes, but used for IP blocks
4369         'AP': 'Asia/Pacific Region',
4370         'EU': 'Europe',
4371     }
4372
4373     @classmethod
4374     def short2full(cls, code):
4375         """Convert an ISO 3166-2 country code to the corresponding full name"""
4376         return cls._country_map.get(code.upper())
4377
4378
4379 class GeoUtils:
4380     # Major IPv4 address blocks per country
4381     _country_ip_map = {
4382         'AD': '46.172.224.0/19',
4383         'AE': '94.200.0.0/13',
4384         'AF': '149.54.0.0/17',
4385         'AG': '209.59.64.0/18',
4386         'AI': '204.14.248.0/21',
4387         'AL': '46.99.0.0/16',
4388         'AM': '46.70.0.0/15',
4389         'AO': '105.168.0.0/13',
4390         'AP': '182.50.184.0/21',
4391         'AQ': '23.154.160.0/24',
4392         'AR': '181.0.0.0/12',
4393         'AS': '202.70.112.0/20',
4394         'AT': '77.116.0.0/14',
4395         'AU': '1.128.0.0/11',
4396         'AW': '181.41.0.0/18',
4397         'AX': '185.217.4.0/22',
4398         'AZ': '5.197.0.0/16',
4399         'BA': '31.176.128.0/17',
4400         'BB': '65.48.128.0/17',
4401         'BD': '114.130.0.0/16',
4402         'BE': '57.0.0.0/8',
4403         'BF': '102.178.0.0/15',
4404         'BG': '95.42.0.0/15',
4405         'BH': '37.131.0.0/17',
4406         'BI': '154.117.192.0/18',
4407         'BJ': '137.255.0.0/16',
4408         'BL': '185.212.72.0/23',
4409         'BM': '196.12.64.0/18',
4410         'BN': '156.31.0.0/16',
4411         'BO': '161.56.0.0/16',
4412         'BQ': '161.0.80.0/20',
4413         'BR': '191.128.0.0/12',
4414         'BS': '24.51.64.0/18',
4415         'BT': '119.2.96.0/19',
4416         'BW': '168.167.0.0/16',
4417         'BY': '178.120.0.0/13',
4418         'BZ': '179.42.192.0/18',
4419         'CA': '99.224.0.0/11',
4420         'CD': '41.243.0.0/16',
4421         'CF': '197.242.176.0/21',
4422         'CG': '160.113.0.0/16',
4423         'CH': '85.0.0.0/13',
4424         'CI': '102.136.0.0/14',
4425         'CK': '202.65.32.0/19',
4426         'CL': '152.172.0.0/14',
4427         'CM': '102.244.0.0/14',
4428         'CN': '36.128.0.0/10',
4429         'CO': '181.240.0.0/12',
4430         'CR': '201.192.0.0/12',
4431         'CU': '152.206.0.0/15',
4432         'CV': '165.90.96.0/19',
4433         'CW': '190.88.128.0/17',
4434         'CY': '31.153.0.0/16',
4435         'CZ': '88.100.0.0/14',
4436         'DE': '53.0.0.0/8',
4437         'DJ': '197.241.0.0/17',
4438         'DK': '87.48.0.0/12',
4439         'DM': '192.243.48.0/20',
4440         'DO': '152.166.0.0/15',
4441         'DZ': '41.96.0.0/12',
4442         'EC': '186.68.0.0/15',
4443         'EE': '90.190.0.0/15',
4444         'EG': '156.160.0.0/11',
4445         'ER': '196.200.96.0/20',
4446         'ES': '88.0.0.0/11',
4447         'ET': '196.188.0.0/14',
4448         'EU': '2.16.0.0/13',
4449         'FI': '91.152.0.0/13',
4450         'FJ': '144.120.0.0/16',
4451         'FK': '80.73.208.0/21',
4452         'FM': '119.252.112.0/20',
4453         'FO': '88.85.32.0/19',
4454         'FR': '90.0.0.0/9',
4455         'GA': '41.158.0.0/15',
4456         'GB': '25.0.0.0/8',
4457         'GD': '74.122.88.0/21',
4458         'GE': '31.146.0.0/16',
4459         'GF': '161.22.64.0/18',
4460         'GG': '62.68.160.0/19',
4461         'GH': '154.160.0.0/12',
4462         'GI': '95.164.0.0/16',
4463         'GL': '88.83.0.0/19',
4464         'GM': '160.182.0.0/15',
4465         'GN': '197.149.192.0/18',
4466         'GP': '104.250.0.0/19',
4467         'GQ': '105.235.224.0/20',
4468         'GR': '94.64.0.0/13',
4469         'GT': '168.234.0.0/16',
4470         'GU': '168.123.0.0/16',
4471         'GW': '197.214.80.0/20',
4472         'GY': '181.41.64.0/18',
4473         'HK': '113.252.0.0/14',
4474         'HN': '181.210.0.0/16',
4475         'HR': '93.136.0.0/13',
4476         'HT': '148.102.128.0/17',
4477         'HU': '84.0.0.0/14',
4478         'ID': '39.192.0.0/10',
4479         'IE': '87.32.0.0/12',
4480         'IL': '79.176.0.0/13',
4481         'IM': '5.62.80.0/20',
4482         'IN': '117.192.0.0/10',
4483         'IO': '203.83.48.0/21',
4484         'IQ': '37.236.0.0/14',
4485         'IR': '2.176.0.0/12',
4486         'IS': '82.221.0.0/16',
4487         'IT': '79.0.0.0/10',
4488         'JE': '87.244.64.0/18',
4489         'JM': '72.27.0.0/17',
4490         'JO': '176.29.0.0/16',
4491         'JP': '133.0.0.0/8',
4492         'KE': '105.48.0.0/12',
4493         'KG': '158.181.128.0/17',
4494         'KH': '36.37.128.0/17',
4495         'KI': '103.25.140.0/22',
4496         'KM': '197.255.224.0/20',
4497         'KN': '198.167.192.0/19',
4498         'KP': '175.45.176.0/22',
4499         'KR': '175.192.0.0/10',
4500         'KW': '37.36.0.0/14',
4501         'KY': '64.96.0.0/15',
4502         'KZ': '2.72.0.0/13',
4503         'LA': '115.84.64.0/18',
4504         'LB': '178.135.0.0/16',
4505         'LC': '24.92.144.0/20',
4506         'LI': '82.117.0.0/19',
4507         'LK': '112.134.0.0/15',
4508         'LR': '102.183.0.0/16',
4509         'LS': '129.232.0.0/17',
4510         'LT': '78.56.0.0/13',
4511         'LU': '188.42.0.0/16',
4512         'LV': '46.109.0.0/16',
4513         'LY': '41.252.0.0/14',
4514         'MA': '105.128.0.0/11',
4515         'MC': '88.209.64.0/18',
4516         'MD': '37.246.0.0/16',
4517         'ME': '178.175.0.0/17',
4518         'MF': '74.112.232.0/21',
4519         'MG': '154.126.0.0/17',
4520         'MH': '117.103.88.0/21',
4521         'MK': '77.28.0.0/15',
4522         'ML': '154.118.128.0/18',
4523         'MM': '37.111.0.0/17',
4524         'MN': '49.0.128.0/17',
4525         'MO': '60.246.0.0/16',
4526         'MP': '202.88.64.0/20',
4527         'MQ': '109.203.224.0/19',
4528         'MR': '41.188.64.0/18',
4529         'MS': '208.90.112.0/22',
4530         'MT': '46.11.0.0/16',
4531         'MU': '105.16.0.0/12',
4532         'MV': '27.114.128.0/18',
4533         'MW': '102.70.0.0/15',
4534         'MX': '187.192.0.0/11',
4535         'MY': '175.136.0.0/13',
4536         'MZ': '197.218.0.0/15',
4537         'NA': '41.182.0.0/16',
4538         'NC': '101.101.0.0/18',
4539         'NE': '197.214.0.0/18',
4540         'NF': '203.17.240.0/22',
4541         'NG': '105.112.0.0/12',
4542         'NI': '186.76.0.0/15',
4543         'NL': '145.96.0.0/11',
4544         'NO': '84.208.0.0/13',
4545         'NP': '36.252.0.0/15',
4546         'NR': '203.98.224.0/19',
4547         'NU': '49.156.48.0/22',
4548         'NZ': '49.224.0.0/14',
4549         'OM': '5.36.0.0/15',
4550         'PA': '186.72.0.0/15',
4551         'PE': '186.160.0.0/14',
4552         'PF': '123.50.64.0/18',
4553         'PG': '124.240.192.0/19',
4554         'PH': '49.144.0.0/13',
4555         'PK': '39.32.0.0/11',
4556         'PL': '83.0.0.0/11',
4557         'PM': '70.36.0.0/20',
4558         'PR': '66.50.0.0/16',
4559         'PS': '188.161.0.0/16',
4560         'PT': '85.240.0.0/13',
4561         'PW': '202.124.224.0/20',
4562         'PY': '181.120.0.0/14',
4563         'QA': '37.210.0.0/15',
4564         'RE': '102.35.0.0/16',
4565         'RO': '79.112.0.0/13',
4566         'RS': '93.86.0.0/15',
4567         'RU': '5.136.0.0/13',
4568         'RW': '41.186.0.0/16',
4569         'SA': '188.48.0.0/13',
4570         'SB': '202.1.160.0/19',
4571         'SC': '154.192.0.0/11',
4572         'SD': '102.120.0.0/13',
4573         'SE': '78.64.0.0/12',
4574         'SG': '8.128.0.0/10',
4575         'SI': '188.196.0.0/14',
4576         'SK': '78.98.0.0/15',
4577         'SL': '102.143.0.0/17',
4578         'SM': '89.186.32.0/19',
4579         'SN': '41.82.0.0/15',
4580         'SO': '154.115.192.0/18',
4581         'SR': '186.179.128.0/17',
4582         'SS': '105.235.208.0/21',
4583         'ST': '197.159.160.0/19',
4584         'SV': '168.243.0.0/16',
4585         'SX': '190.102.0.0/20',
4586         'SY': '5.0.0.0/16',
4587         'SZ': '41.84.224.0/19',
4588         'TC': '65.255.48.0/20',
4589         'TD': '154.68.128.0/19',
4590         'TG': '196.168.0.0/14',
4591         'TH': '171.96.0.0/13',
4592         'TJ': '85.9.128.0/18',
4593         'TK': '27.96.24.0/21',
4594         'TL': '180.189.160.0/20',
4595         'TM': '95.85.96.0/19',
4596         'TN': '197.0.0.0/11',
4597         'TO': '175.176.144.0/21',
4598         'TR': '78.160.0.0/11',
4599         'TT': '186.44.0.0/15',
4600         'TV': '202.2.96.0/19',
4601         'TW': '120.96.0.0/11',
4602         'TZ': '156.156.0.0/14',
4603         'UA': '37.52.0.0/14',
4604         'UG': '102.80.0.0/13',
4605         'US': '6.0.0.0/8',
4606         'UY': '167.56.0.0/13',
4607         'UZ': '84.54.64.0/18',
4608         'VA': '212.77.0.0/19',
4609         'VC': '207.191.240.0/21',
4610         'VE': '186.88.0.0/13',
4611         'VG': '66.81.192.0/20',
4612         'VI': '146.226.0.0/16',
4613         'VN': '14.160.0.0/11',
4614         'VU': '202.80.32.0/20',
4615         'WF': '117.20.32.0/21',
4616         'WS': '202.4.32.0/19',
4617         'YE': '134.35.0.0/16',
4618         'YT': '41.242.116.0/22',
4619         'ZA': '41.0.0.0/11',
4620         'ZM': '102.144.0.0/13',
4621         'ZW': '102.177.192.0/18',
4622     }
4623
4624     @classmethod
4625     def random_ipv4(cls, code_or_block):
4626         if len(code_or_block) == 2:
4627             block = cls._country_ip_map.get(code_or_block.upper())
4628             if not block:
4629                 return None
4630         else:
4631             block = code_or_block
4632         addr, preflen = block.split('/')
4633         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
4634         addr_max = addr_min | (0xffffffff >> int(preflen))
4635         return compat_str(socket.inet_ntoa(
4636             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
4637
4638
4639 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
4640     def __init__(self, proxies=None):
4641         # Set default handlers
4642         for type in ('http', 'https'):
4643             setattr(self, '%s_open' % type,
4644                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4645                         meth(r, proxy, type))
4646         compat_urllib_request.ProxyHandler.__init__(self, proxies)
4647
4648     def proxy_open(self, req, proxy, type):
4649         req_proxy = req.headers.get('Ytdl-request-proxy')
4650         if req_proxy is not None:
4651             proxy = req_proxy
4652             del req.headers['Ytdl-request-proxy']
4653
4654         if proxy == '__noproxy__':
4655             return None  # No Proxy
4656         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4657             req.add_header('Ytdl-socks-proxy', proxy)
4658             # yt-dlp's http/https handlers do wrapping the socket with socks
4659             return None
4660         return compat_urllib_request.ProxyHandler.proxy_open(
4661             self, req, proxy, type)
4662
4663
4664 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4665 # released into Public Domain
4666 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4667
4668 def long_to_bytes(n, blocksize=0):
4669     """long_to_bytes(n:long, blocksize:int) : string
4670     Convert a long integer to a byte string.
4671
4672     If optional blocksize is given and greater than zero, pad the front of the
4673     byte string with binary zeros so that the length is a multiple of
4674     blocksize.
4675     """
4676     # after much testing, this algorithm was deemed to be the fastest
4677     s = b''
4678     n = int(n)
4679     while n > 0:
4680         s = compat_struct_pack('>I', n & 0xffffffff) + s
4681         n = n >> 32
4682     # strip off leading zeros
4683     for i in range(len(s)):
4684         if s[i] != b'\000'[0]:
4685             break
4686     else:
4687         # only happens when n == 0
4688         s = b'\000'
4689         i = 0
4690     s = s[i:]
4691     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4692     # de-padding being done above, but sigh...
4693     if blocksize > 0 and len(s) % blocksize:
4694         s = (blocksize - len(s) % blocksize) * b'\000' + s
4695     return s
4696
4697
4698 def bytes_to_long(s):
4699     """bytes_to_long(string) : long
4700     Convert a byte string to a long integer.
4701
4702     This is (essentially) the inverse of long_to_bytes().
4703     """
4704     acc = 0
4705     length = len(s)
4706     if length % 4:
4707         extra = (4 - length % 4)
4708         s = b'\000' * extra + s
4709         length = length + extra
4710     for i in range(0, length, 4):
4711         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
4712     return acc
4713
4714
4715 def ohdave_rsa_encrypt(data, exponent, modulus):
4716     '''
4717     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4718
4719     Input:
4720         data: data to encrypt, bytes-like object
4721         exponent, modulus: parameter e and N of RSA algorithm, both integer
4722     Output: hex string of encrypted data
4723
4724     Limitation: supports one block encryption only
4725     '''
4726
4727     payload = int(binascii.hexlify(data[::-1]), 16)
4728     encrypted = pow(payload, exponent, modulus)
4729     return '%x' % encrypted
4730
4731
4732 def pkcs1pad(data, length):
4733     """
4734     Padding input data with PKCS#1 scheme
4735
4736     @param {int[]} data        input data
4737     @param {int}   length      target length
4738     @returns {int[]}           padded data
4739     """
4740     if len(data) > length - 11:
4741         raise ValueError('Input data too long for PKCS#1 padding')
4742
4743     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4744     return [0, 2] + pseudo_random + [0] + data
4745
4746
4747 def encode_base_n(num, n, table=None):
4748     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
4749     if not table:
4750         table = FULL_TABLE[:n]
4751
4752     if n > len(table):
4753         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
4754
4755     if num == 0:
4756         return table[0]
4757
4758     ret = ''
4759     while num:
4760         ret = table[num % n] + ret
4761         num = num // n
4762     return ret
4763
4764
4765 def decode_packed_codes(code):
4766     mobj = re.search(PACKED_CODES_RE, code)
4767     obfuscated_code, base, count, symbols = mobj.groups()
4768     base = int(base)
4769     count = int(count)
4770     symbols = symbols.split('|')
4771     symbol_table = {}
4772
4773     while count:
4774         count -= 1
4775         base_n_count = encode_base_n(count, base)
4776         symbol_table[base_n_count] = symbols[count] or base_n_count
4777
4778     return re.sub(
4779         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4780         obfuscated_code)
4781
4782
4783 def caesar(s, alphabet, shift):
4784     if shift == 0:
4785         return s
4786     l = len(alphabet)
4787     return ''.join(
4788         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4789         for c in s)
4790
4791
4792 def rot47(s):
4793     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4794
4795
4796 def parse_m3u8_attributes(attrib):
4797     info = {}
4798     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4799         if val.startswith('"'):
4800             val = val[1:-1]
4801         info[key] = val
4802     return info
4803
4804
4805 def urshift(val, n):
4806     return val >> n if val >= 0 else (val + 0x100000000) >> n
4807
4808
4809 # Based on png2str() written by @gdkchan and improved by @yokrysty
4810 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4811 def decode_png(png_data):
4812     # Reference: https://www.w3.org/TR/PNG/
4813     header = png_data[8:]
4814
4815     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4816         raise OSError('Not a valid PNG file.')
4817
4818     int_map = {1: '>B', 2: '>H', 4: '>I'}
4819     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
4820
4821     chunks = []
4822
4823     while header:
4824         length = unpack_integer(header[:4])
4825         header = header[4:]
4826
4827         chunk_type = header[:4]
4828         header = header[4:]
4829
4830         chunk_data = header[:length]
4831         header = header[length:]
4832
4833         header = header[4:]  # Skip CRC
4834
4835         chunks.append({
4836             'type': chunk_type,
4837             'length': length,
4838             'data': chunk_data
4839         })
4840
4841     ihdr = chunks[0]['data']
4842
4843     width = unpack_integer(ihdr[:4])
4844     height = unpack_integer(ihdr[4:8])
4845
4846     idat = b''
4847
4848     for chunk in chunks:
4849         if chunk['type'] == b'IDAT':
4850             idat += chunk['data']
4851
4852     if not idat:
4853         raise OSError('Unable to read PNG data.')
4854
4855     decompressed_data = bytearray(zlib.decompress(idat))
4856
4857     stride = width * 3
4858     pixels = []
4859
4860     def _get_pixel(idx):
4861         x = idx % stride
4862         y = idx // stride
4863         return pixels[y][x]
4864
4865     for y in range(height):
4866         basePos = y * (1 + stride)
4867         filter_type = decompressed_data[basePos]
4868
4869         current_row = []
4870
4871         pixels.append(current_row)
4872
4873         for x in range(stride):
4874             color = decompressed_data[1 + basePos + x]
4875             basex = y * stride + x
4876             left = 0
4877             up = 0
4878
4879             if x > 2:
4880                 left = _get_pixel(basex - 3)
4881             if y > 0:
4882                 up = _get_pixel(basex - stride)
4883
4884             if filter_type == 1:  # Sub
4885                 color = (color + left) & 0xff
4886             elif filter_type == 2:  # Up
4887                 color = (color + up) & 0xff
4888             elif filter_type == 3:  # Average
4889                 color = (color + ((left + up) >> 1)) & 0xff
4890             elif filter_type == 4:  # Paeth
4891                 a = left
4892                 b = up
4893                 c = 0
4894
4895                 if x > 2 and y > 0:
4896                     c = _get_pixel(basex - stride - 3)
4897
4898                 p = a + b - c
4899
4900                 pa = abs(p - a)
4901                 pb = abs(p - b)
4902                 pc = abs(p - c)
4903
4904                 if pa <= pb and pa <= pc:
4905                     color = (color + a) & 0xff
4906                 elif pb <= pc:
4907                     color = (color + b) & 0xff
4908                 else:
4909                     color = (color + c) & 0xff
4910
4911             current_row.append(color)
4912
4913     return width, height, pixels
4914
4915
4916 def write_xattr(path, key, value):
4917     # Windows: Write xattrs to NTFS Alternate Data Streams:
4918     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4919     if compat_os_name == 'nt':
4920         assert ':' not in key
4921         assert os.path.exists(path)
4922
4923         try:
4924             with open(f'{path}:{key}', 'wb') as f:
4925                 f.write(value)
4926         except OSError as e:
4927             raise XAttrMetadataError(e.errno, e.strerror)
4928         return
4929
4930     # UNIX Method 1. Use xattrs/pyxattrs modules
4931     from .dependencies import xattr
4932
4933     setxattr = None
4934     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
4935         # Unicode arguments are not supported in pyxattr until version 0.5.0
4936         # See https://github.com/ytdl-org/youtube-dl/issues/5498
4937         if version_tuple(xattr.__version__) >= (0, 5, 0):
4938             setxattr = xattr.set
4939     elif xattr:
4940         setxattr = xattr.setxattr
4941
4942     if setxattr:
4943         try:
4944             setxattr(path, key, value)
4945         except OSError as e:
4946             raise XAttrMetadataError(e.errno, e.strerror)
4947         return
4948
4949     # UNIX Method 2. Use setfattr/xattr executables
4950     exe = ('setfattr' if check_executable('setfattr', ['--version'])
4951            else 'xattr' if check_executable('xattr', ['-h']) else None)
4952     if not exe:
4953         raise XAttrUnavailableError(
4954             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
4955             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
4956
4957     value = value.decode()
4958     try:
4959         _, stderr, returncode = Popen.run(
4960             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
4961             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
4962     except OSError as e:
4963         raise XAttrMetadataError(e.errno, e.strerror)
4964     if returncode:
4965         raise XAttrMetadataError(returncode, stderr)
4966
4967
4968 def random_birthday(year_field, month_field, day_field):
4969     start_date = datetime.date(1950, 1, 1)
4970     end_date = datetime.date(1995, 12, 31)
4971     offset = random.randint(0, (end_date - start_date).days)
4972     random_date = start_date + datetime.timedelta(offset)
4973     return {
4974         year_field: str(random_date.year),
4975         month_field: str(random_date.month),
4976         day_field: str(random_date.day),
4977     }
4978
4979
4980 # Templates for internet shortcut files, which are plain text files.
4981 DOT_URL_LINK_TEMPLATE = '''\
4982 [InternetShortcut]
4983 URL=%(url)s
4984 '''
4985
4986 DOT_WEBLOC_LINK_TEMPLATE = '''\
4987 <?xml version="1.0" encoding="UTF-8"?>
4988 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4989 <plist version="1.0">
4990 <dict>
4991 \t<key>URL</key>
4992 \t<string>%(url)s</string>
4993 </dict>
4994 </plist>
4995 '''
4996
4997 DOT_DESKTOP_LINK_TEMPLATE = '''\
4998 [Desktop Entry]
4999 Encoding=UTF-8
5000 Name=%(filename)s
5001 Type=Link
5002 URL=%(url)s
5003 Icon=text-html
5004 '''
5005
5006 LINK_TEMPLATES = {
5007     'url': DOT_URL_LINK_TEMPLATE,
5008     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5009     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5010 }
5011
5012
5013 def iri_to_uri(iri):
5014     """
5015     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5016
5017     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5018     """
5019
5020     iri_parts = compat_urllib_parse_urlparse(iri)
5021
5022     if '[' in iri_parts.netloc:
5023         raise ValueError('IPv6 URIs are not, yet, supported.')
5024         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5025
5026     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5027
5028     net_location = ''
5029     if iri_parts.username:
5030         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5031         if iri_parts.password is not None:
5032             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5033         net_location += '@'
5034
5035     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5036     # The 'idna' encoding produces ASCII text.
5037     if iri_parts.port is not None and iri_parts.port != 80:
5038         net_location += ':' + str(iri_parts.port)
5039
5040     return urllib.parse.urlunparse(
5041         (iri_parts.scheme,
5042             net_location,
5043
5044             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5045
5046             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5047             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5048
5049             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5050             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5051
5052             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5053
5054     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5055
5056
5057 def to_high_limit_path(path):
5058     if sys.platform in ['win32', 'cygwin']:
5059         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5060         return '\\\\?\\' + os.path.abspath(path)
5061
5062     return path
5063
5064
5065 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=None):
5066     val = traverse_obj(obj, *variadic(field))
5067     if (not val and val != 0) if ignore is NO_DEFAULT else val in ignore:
5068         return default
5069     return template % (func(val) if func else val)
5070
5071
5072 def clean_podcast_url(url):
5073     return re.sub(r'''(?x)
5074         (?:
5075             (?:
5076                 chtbl\.com/track|
5077                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5078                 play\.podtrac\.com
5079             )/[^/]+|
5080             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5081             flex\.acast\.com|
5082             pd(?:
5083                 cn\.co| # https://podcorn.com/analytics-prefix/
5084                 st\.fm # https://podsights.com/docs/
5085             )/e
5086         )/''', '', url)
5087
5088
5089 _HEX_TABLE = '0123456789abcdef'
5090
5091
5092 def random_uuidv4():
5093     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5094
5095
5096 def make_dir(path, to_screen=None):
5097     try:
5098         dn = os.path.dirname(path)
5099         if dn and not os.path.exists(dn):
5100             os.makedirs(dn)
5101         return True
5102     except OSError as err:
5103         if callable(to_screen) is not None:
5104             to_screen('unable to create directory ' + error_to_compat_str(err))
5105         return False
5106
5107
5108 def get_executable_path():
5109     from .update import _get_variant_and_executable_path
5110
5111     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5112
5113
5114 def load_plugins(name, suffix, namespace):
5115     classes = {}
5116     with contextlib.suppress(FileNotFoundError):
5117         plugins_spec = importlib.util.spec_from_file_location(
5118             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5119         plugins = importlib.util.module_from_spec(plugins_spec)
5120         sys.modules[plugins_spec.name] = plugins
5121         plugins_spec.loader.exec_module(plugins)
5122         for name in dir(plugins):
5123             if name in namespace:
5124                 continue
5125             if not name.endswith(suffix):
5126                 continue
5127             klass = getattr(plugins, name)
5128             classes[name] = namespace[name] = klass
5129     return classes
5130
5131
5132 def traverse_obj(
5133         obj, *path_list, default=None, expected_type=None, get_all=True,
5134         casesense=True, is_user_input=False, traverse_string=False):
5135     ''' Traverse nested list/dict/tuple
5136     @param path_list        A list of paths which are checked one by one.
5137                             Each path is a list of keys where each key is a:
5138                               - None:     Do nothing
5139                               - string:   A dictionary key
5140                               - int:      An index into a list
5141                               - tuple:    A list of keys all of which will be traversed
5142                               - Ellipsis: Fetch all values in the object
5143                               - Function: Takes the key and value as arguments
5144                                           and returns whether the key matches or not
5145     @param default          Default value to return
5146     @param expected_type    Only accept final value of this type (Can also be any callable)
5147     @param get_all          Return all the values obtained from a path or only the first one
5148     @param casesense        Whether to consider dictionary keys as case sensitive
5149     @param is_user_input    Whether the keys are generated from user input. If True,
5150                             strings are converted to int/slice if necessary
5151     @param traverse_string  Whether to traverse inside strings. If True, any
5152                             non-compatible object will also be converted into a string
5153     # TODO: Write tests
5154     '''
5155     if not casesense:
5156         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
5157         path_list = (map(_lower, variadic(path)) for path in path_list)
5158
5159     def _traverse_obj(obj, path, _current_depth=0):
5160         nonlocal depth
5161         path = tuple(variadic(path))
5162         for i, key in enumerate(path):
5163             if None in (key, obj):
5164                 return obj
5165             if isinstance(key, (list, tuple)):
5166                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
5167                 key = ...
5168             if key is ...:
5169                 obj = (obj.values() if isinstance(obj, dict)
5170                        else obj if isinstance(obj, (list, tuple, LazyList))
5171                        else str(obj) if traverse_string else [])
5172                 _current_depth += 1
5173                 depth = max(depth, _current_depth)
5174                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
5175             elif callable(key):
5176                 if isinstance(obj, (list, tuple, LazyList)):
5177                     obj = enumerate(obj)
5178                 elif isinstance(obj, dict):
5179                     obj = obj.items()
5180                 else:
5181                     if not traverse_string:
5182                         return None
5183                     obj = str(obj)
5184                 _current_depth += 1
5185                 depth = max(depth, _current_depth)
5186                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
5187             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
5188                 obj = (obj.get(key) if casesense or (key in obj)
5189                        else next((v for k, v in obj.items() if _lower(k) == key), None))
5190             else:
5191                 if is_user_input:
5192                     key = (int_or_none(key) if ':' not in key
5193                            else slice(*map(int_or_none, key.split(':'))))
5194                     if key == slice(None):
5195                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
5196                 if not isinstance(key, (int, slice)):
5197                     return None
5198                 if not isinstance(obj, (list, tuple, LazyList)):
5199                     if not traverse_string:
5200                         return None
5201                     obj = str(obj)
5202                 try:
5203                     obj = obj[key]
5204                 except IndexError:
5205                     return None
5206         return obj
5207
5208     if isinstance(expected_type, type):
5209         type_test = lambda val: val if isinstance(val, expected_type) else None
5210     elif expected_type is not None:
5211         type_test = expected_type
5212     else:
5213         type_test = lambda val: val
5214
5215     for path in path_list:
5216         depth = 0
5217         val = _traverse_obj(obj, path)
5218         if val is not None:
5219             if depth:
5220                 for _ in range(depth - 1):
5221                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5222                 val = [v for v in map(type_test, val) if v is not None]
5223                 if val:
5224                     return val if get_all else val[0]
5225             else:
5226                 val = type_test(val)
5227                 if val is not None:
5228                     return val
5229     return default
5230
5231
5232 def traverse_dict(dictn, keys, casesense=True):
5233     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5234                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5235     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5236
5237
5238 def get_first(obj, keys, **kwargs):
5239     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5240
5241
5242 def variadic(x, allowed_types=(str, bytes, dict)):
5243     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5244
5245
5246 def decode_base(value, digits):
5247     # This will convert given base-x string to scalar (long or int)
5248     table = {char: index for index, char in enumerate(digits)}
5249     result = 0
5250     base = len(digits)
5251     for chr in value:
5252         result *= base
5253         result += table[chr]
5254     return result
5255
5256
5257 def time_seconds(**kwargs):
5258     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5259     return t.timestamp()
5260
5261
5262 # create a JSON Web Signature (jws) with HS256 algorithm
5263 # the resulting format is in JWS Compact Serialization
5264 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5265 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5266 def jwt_encode_hs256(payload_data, key, headers={}):
5267     header_data = {
5268         'alg': 'HS256',
5269         'typ': 'JWT',
5270     }
5271     if headers:
5272         header_data.update(headers)
5273     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5274     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5275     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5276     signature_b64 = base64.b64encode(h.digest())
5277     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5278     return token
5279
5280
5281 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5282 def jwt_decode_hs256(jwt):
5283     header_b64, payload_b64, signature_b64 = jwt.split('.')
5284     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5285     return payload_data
5286
5287
5288 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5289
5290
5291 @functools.cache
5292 def supports_terminal_sequences(stream):
5293     if compat_os_name == 'nt':
5294         if not WINDOWS_VT_MODE:
5295             return False
5296     elif not os.getenv('TERM'):
5297         return False
5298     try:
5299         return stream.isatty()
5300     except BaseException:
5301         return False
5302
5303
5304 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5305     if get_windows_version() < (10, 0, 10586):
5306         return
5307     global WINDOWS_VT_MODE
5308     try:
5309         Popen.run('', shell=True)
5310     except Exception:
5311         return
5312
5313     WINDOWS_VT_MODE = True
5314     supports_terminal_sequences.cache_clear()
5315
5316
5317 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5318
5319
5320 def remove_terminal_sequences(string):
5321     return _terminal_sequences_re.sub('', string)
5322
5323
5324 def number_of_digits(number):
5325     return len('%d' % number)
5326
5327
5328 def join_nonempty(*values, delim='-', from_dict=None):
5329     if from_dict is not None:
5330         values = map(from_dict.get, values)
5331     return delim.join(map(str, filter(None, values)))
5332
5333
5334 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5335     """
5336     Find the largest format dimensions in terms of video width and, for each thumbnail:
5337     * Modify the URL: Match the width with the provided regex and replace with the former width
5338     * Update dimensions
5339
5340     This function is useful with video services that scale the provided thumbnails on demand
5341     """
5342     _keys = ('width', 'height')
5343     max_dimensions = max(
5344         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5345         default=(0, 0))
5346     if not max_dimensions[0]:
5347         return thumbnails
5348     return [
5349         merge_dicts(
5350             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5351             dict(zip(_keys, max_dimensions)), thumbnail)
5352         for thumbnail in thumbnails
5353     ]
5354
5355
5356 def parse_http_range(range):
5357     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5358     if not range:
5359         return None, None, None
5360     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5361     if not crg:
5362         return None, None, None
5363     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5364
5365
5366 def read_stdin(what):
5367     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5368     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5369     return sys.stdin
5370
5371
5372 class Config:
5373     own_args = None
5374     parsed_args = None
5375     filename = None
5376     __initialized = False
5377
5378     def __init__(self, parser, label=None):
5379         self.parser, self.label = parser, label
5380         self._loaded_paths, self.configs = set(), []
5381
5382     def init(self, args=None, filename=None):
5383         assert not self.__initialized
5384         directory = ''
5385         if filename:
5386             location = os.path.realpath(filename)
5387             directory = os.path.dirname(location)
5388             if location in self._loaded_paths:
5389                 return False
5390             self._loaded_paths.add(location)
5391
5392         self.own_args, self.__initialized = args, True
5393         opts, _ = self.parser.parse_known_args(args)
5394         self.parsed_args, self.filename = args, filename
5395
5396         for location in opts.config_locations or []:
5397             if location == '-':
5398                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5399                 continue
5400             location = os.path.join(directory, expand_path(location))
5401             if os.path.isdir(location):
5402                 location = os.path.join(location, 'yt-dlp.conf')
5403             if not os.path.exists(location):
5404                 self.parser.error(f'config location {location} does not exist')
5405             self.append_config(self.read_file(location), location)
5406         return True
5407
5408     def __str__(self):
5409         label = join_nonempty(
5410             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5411             delim=' ')
5412         return join_nonempty(
5413             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5414             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5415             delim='\n')
5416
5417     @staticmethod
5418     def read_file(filename, default=[]):
5419         try:
5420             optionf = open(filename)
5421         except OSError:
5422             return default  # silently skip if file is not present
5423         try:
5424             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5425             contents = optionf.read()
5426             res = shlex.split(contents, comments=True)
5427         except Exception as err:
5428             raise ValueError(f'Unable to parse "{filename}": {err}')
5429         finally:
5430             optionf.close()
5431         return res
5432
5433     @staticmethod
5434     def hide_login_info(opts):
5435         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5436         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5437
5438         def _scrub_eq(o):
5439             m = eqre.match(o)
5440             if m:
5441                 return m.group('key') + '=PRIVATE'
5442             else:
5443                 return o
5444
5445         opts = list(map(_scrub_eq, opts))
5446         for idx, opt in enumerate(opts):
5447             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5448                 opts[idx + 1] = 'PRIVATE'
5449         return opts
5450
5451     def append_config(self, *args, label=None):
5452         config = type(self)(self.parser, label)
5453         config._loaded_paths = self._loaded_paths
5454         if config.init(*args):
5455             self.configs.append(config)
5456
5457     @property
5458     def all_args(self):
5459         for config in reversed(self.configs):
5460             yield from config.all_args
5461         yield from self.parsed_args or []
5462
5463     def parse_known_args(self, **kwargs):
5464         return self.parser.parse_known_args(self.all_args, **kwargs)
5465
5466     def parse_args(self):
5467         return self.parser.parse_args(self.all_args)
5468
5469
5470 class WebSocketsWrapper():
5471     """Wraps websockets module to use in non-async scopes"""
5472     pool = None
5473
5474     def __init__(self, url, headers=None, connect=True):
5475         self.loop = asyncio.new_event_loop()
5476         # XXX: "loop" is deprecated
5477         self.conn = websockets.connect(
5478             url, extra_headers=headers, ping_interval=None,
5479             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5480         if connect:
5481             self.__enter__()
5482         atexit.register(self.__exit__, None, None, None)
5483
5484     def __enter__(self):
5485         if not self.pool:
5486             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5487         return self
5488
5489     def send(self, *args):
5490         self.run_with_loop(self.pool.send(*args), self.loop)
5491
5492     def recv(self, *args):
5493         return self.run_with_loop(self.pool.recv(*args), self.loop)
5494
5495     def __exit__(self, type, value, traceback):
5496         try:
5497             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5498         finally:
5499             self.loop.close()
5500             self._cancel_all_tasks(self.loop)
5501
5502     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5503     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5504     @staticmethod
5505     def run_with_loop(main, loop):
5506         if not asyncio.iscoroutine(main):
5507             raise ValueError(f'a coroutine was expected, got {main!r}')
5508
5509         try:
5510             return loop.run_until_complete(main)
5511         finally:
5512             loop.run_until_complete(loop.shutdown_asyncgens())
5513             if hasattr(loop, 'shutdown_default_executor'):
5514                 loop.run_until_complete(loop.shutdown_default_executor())
5515
5516     @staticmethod
5517     def _cancel_all_tasks(loop):
5518         to_cancel = asyncio.all_tasks(loop)
5519
5520         if not to_cancel:
5521             return
5522
5523         for task in to_cancel:
5524             task.cancel()
5525
5526         # XXX: "loop" is removed in python 3.10+
5527         loop.run_until_complete(
5528             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5529
5530         for task in to_cancel:
5531             if task.cancelled():
5532                 continue
5533             if task.exception() is not None:
5534                 loop.call_exception_handler({
5535                     'message': 'unhandled exception during asyncio.run() shutdown',
5536                     'exception': task.exception(),
5537                     'task': task,
5538                 })
5539
5540
5541 def merge_headers(*dicts):
5542     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5543     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5544
5545
5546 class classproperty:
5547     """classmethod(property(func)) that works in py < 3.9"""
5548
5549     def __init__(self, func):
5550         functools.update_wrapper(self, func)
5551         self.func = func
5552
5553     def __get__(self, _, cls):
5554         return self.func(cls)
5555
5556
5557 class Namespace(types.SimpleNamespace):
5558     """Immutable namespace"""
5559
5560     def __iter__(self):
5561         return iter(self.__dict__.values())
5562
5563     @property
5564     def items_(self):
5565         return self.__dict__.items()
5566
5567
5568 # Deprecated
5569 has_certifi = bool(certifi)
5570 has_websockets = bool(websockets)