yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import contextlib
   9 import ctypes
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import importlib.util
  18 import io
  19 import itertools
  20 import json
  21 import locale
  22 import math
  23 import mimetypes
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import shlex
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import types
  38 import urllib.parse
  39 import xml.etree.ElementTree
  40 import zlib
  41
  42 from .compat import asyncio, functools  # isort: split
  43 from .compat import (
  44     compat_chr,
  45     compat_cookiejar,
  46     compat_etree_fromstring,
  47     compat_expanduser,
  48     compat_html_entities,
  49     compat_html_entities_html5,
  50     compat_HTMLParseError,
  51     compat_HTMLParser,
  52     compat_http_client,
  53     compat_HTTPError,
  54     compat_os_name,
  55     compat_parse_qs,
  56     compat_shlex_quote,
  57     compat_str,
  58     compat_struct_pack,
  59     compat_struct_unpack,
  60     compat_urllib_error,
  61     compat_urllib_parse_unquote_plus,
  62     compat_urllib_parse_urlencode,
  63     compat_urllib_parse_urlparse,
  64     compat_urllib_request,
  65     compat_urlparse,
  66 )
  67 from .dependencies import brotli, certifi, websockets
  68 from .socks import ProxyType, sockssocket
  69
  70
  71 def register_socks_protocols():
  72     # "Register" SOCKS protocols
  73     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  74     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  75     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  76         if scheme not in compat_urlparse.uses_netloc:
  77             compat_urlparse.uses_netloc.append(scheme)
  78
  79
  80 # This is not clearly defined otherwise
  81 compiled_regex_type = type(re.compile(''))
  82
  83
  84 def random_user_agent():
  85     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  86     _CHROME_VERSIONS = (
  87         '90.0.4430.212',
  88         '90.0.4430.24',
  89         '90.0.4430.70',
  90         '90.0.4430.72',
  91         '90.0.4430.85',
  92         '90.0.4430.93',
  93         '91.0.4472.101',
  94         '91.0.4472.106',
  95         '91.0.4472.114',
  96         '91.0.4472.124',
  97         '91.0.4472.164',
  98         '91.0.4472.19',
  99         '91.0.4472.77',
 100         '92.0.4515.107',
 101         '92.0.4515.115',
 102         '92.0.4515.131',
 103         '92.0.4515.159',
 104         '92.0.4515.43',
 105         '93.0.4556.0',
 106         '93.0.4577.15',
 107         '93.0.4577.63',
 108         '93.0.4577.82',
 109         '94.0.4606.41',
 110         '94.0.4606.54',
 111         '94.0.4606.61',
 112         '94.0.4606.71',
 113         '94.0.4606.81',
 114         '94.0.4606.85',
 115         '95.0.4638.17',
 116         '95.0.4638.50',
 117         '95.0.4638.54',
 118         '95.0.4638.69',
 119         '95.0.4638.74',
 120         '96.0.4664.18',
 121         '96.0.4664.45',
 122         '96.0.4664.55',
 123         '96.0.4664.93',
 124         '97.0.4692.20',
 125     )
 126     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 127
 128
 129 SUPPORTED_ENCODINGS = [
 130     'gzip', 'deflate'
 131 ]
 132 if brotli:
 133     SUPPORTED_ENCODINGS.append('br')
 134
 135 std_headers = {
 136     'User-Agent': random_user_agent(),
 137     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 138     'Accept-Language': 'en-us,en;q=0.5',
 139     'Sec-Fetch-Mode': 'navigate',
 140 }
 141
 142
 143 USER_AGENTS = {
 144     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 145 }
 146
 147
 148 NO_DEFAULT = object()
 149
 150 ENGLISH_MONTH_NAMES = [
 151     'January', 'February', 'March', 'April', 'May', 'June',
 152     'July', 'August', 'September', 'October', 'November', 'December']
 153
 154 MONTH_NAMES = {
 155     'en': ENGLISH_MONTH_NAMES,
 156     'fr': [
 157         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 158         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 159 }
 160
 161 KNOWN_EXTENSIONS = (
 162     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 163     'flv', 'f4v', 'f4a', 'f4b',
 164     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 165     'mkv', 'mka', 'mk3d',
 166     'avi', 'divx',
 167     'mov',
 168     'asf', 'wmv', 'wma',
 169     '3gp', '3g2',
 170     'mp3',
 171     'flac',
 172     'ape',
 173     'wav',
 174     'f4f', 'f4m', 'm3u8', 'smil')
 175
 176 # needed for sanitizing filenames in restricted mode
 177 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 178                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 179                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 180
 181 DATE_FORMATS = (
 182     '%d %B %Y',
 183     '%d %b %Y',
 184     '%B %d %Y',
 185     '%B %dst %Y',
 186     '%B %dnd %Y',
 187     '%B %drd %Y',
 188     '%B %dth %Y',
 189     '%b %d %Y',
 190     '%b %dst %Y',
 191     '%b %dnd %Y',
 192     '%b %drd %Y',
 193     '%b %dth %Y',
 194     '%b %dst %Y %I:%M',
 195     '%b %dnd %Y %I:%M',
 196     '%b %drd %Y %I:%M',
 197     '%b %dth %Y %I:%M',
 198     '%Y %m %d',
 199     '%Y-%m-%d',
 200     '%Y.%m.%d.',
 201     '%Y/%m/%d',
 202     '%Y/%m/%d %H:%M',
 203     '%Y/%m/%d %H:%M:%S',
 204     '%Y%m%d%H%M',
 205     '%Y%m%d%H%M%S',
 206     '%Y%m%d',
 207     '%Y-%m-%d %H:%M',
 208     '%Y-%m-%d %H:%M:%S',
 209     '%Y-%m-%d %H:%M:%S.%f',
 210     '%Y-%m-%d %H:%M:%S:%f',
 211     '%d.%m.%Y %H:%M',
 212     '%d.%m.%Y %H.%M',
 213     '%Y-%m-%dT%H:%M:%SZ',
 214     '%Y-%m-%dT%H:%M:%S.%fZ',
 215     '%Y-%m-%dT%H:%M:%S.%f0Z',
 216     '%Y-%m-%dT%H:%M:%S',
 217     '%Y-%m-%dT%H:%M:%S.%f',
 218     '%Y-%m-%dT%H:%M',
 219     '%b %d %Y at %H:%M',
 220     '%b %d %Y at %H:%M:%S',
 221     '%B %d %Y at %H:%M',
 222     '%B %d %Y at %H:%M:%S',
 223     '%H:%M %d-%b-%Y',
 224 )
 225
 226 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 227 DATE_FORMATS_DAY_FIRST.extend([
 228     '%d-%m-%Y',
 229     '%d.%m.%Y',
 230     '%d.%m.%y',
 231     '%d/%m/%Y',
 232     '%d/%m/%y',
 233     '%d/%m/%Y %H:%M:%S',
 234 ])
 235
 236 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 237 DATE_FORMATS_MONTH_FIRST.extend([
 238     '%m-%d-%Y',
 239     '%m.%d.%Y',
 240     '%m/%d/%Y',
 241     '%m/%d/%y',
 242     '%m/%d/%Y %H:%M:%S',
 243 ])
 244
 245 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 246 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 247
 248 NUMBER_RE = r'\d+(?:\.\d+)?'
 249
 250
 251 @functools.cache
 252 def preferredencoding():
 253     """Get preferred encoding.
 254
 255     Returns the best encoding scheme for the system, based on
 256     locale.getpreferredencoding() and some further tweaks.
 257     """
 258     try:
 259         pref = locale.getpreferredencoding()
 260         'TEST'.encode(pref)
 261     except Exception:
 262         pref = 'UTF-8'
 263
 264     return pref
 265
 266
 267 def write_json_file(obj, fn):
 268     """ Encode obj as JSON and write it to fn, atomically if possible """
 269
 270     tf = tempfile.NamedTemporaryFile(
 271         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 272         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 273
 274     try:
 275         with tf:
 276             json.dump(obj, tf, ensure_ascii=False)
 277         if sys.platform == 'win32':
 278             # Need to remove existing file on Windows, else os.rename raises
 279             # WindowsError or FileExistsError.
 280             with contextlib.suppress(OSError):
 281                 os.unlink(fn)
 282         with contextlib.suppress(OSError):
 283             mask = os.umask(0)
 284             os.umask(mask)
 285             os.chmod(tf.name, 0o666 & ~mask)
 286         os.rename(tf.name, fn)
 287     except Exception:
 288         with contextlib.suppress(OSError):
 289             os.remove(tf.name)
 290         raise
 291
 292
 293 def find_xpath_attr(node, xpath, key, val=None):
 294     """ Find the xpath xpath[@key=val] """
 295     assert re.match(r'^[a-zA-Z_-]+$', key)
 296     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 297     return node.find(expr)
 298
 299 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 300 # the namespace parameter
 301
 302
 303 def xpath_with_ns(path, ns_map):
 304     components = [c.split(':') for c in path.split('/')]
 305     replaced = []
 306     for c in components:
 307         if len(c) == 1:
 308             replaced.append(c[0])
 309         else:
 310             ns, tag = c
 311             replaced.append('{%s}%s' % (ns_map[ns], tag))
 312     return '/'.join(replaced)
 313
 314
 315 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 316     def _find_xpath(xpath):
 317         return node.find(xpath)
 318
 319     if isinstance(xpath, (str, compat_str)):
 320         n = _find_xpath(xpath)
 321     else:
 322         for xp in xpath:
 323             n = _find_xpath(xp)
 324             if n is not None:
 325                 break
 326
 327     if n is None:
 328         if default is not NO_DEFAULT:
 329             return default
 330         elif fatal:
 331             name = xpath if name is None else name
 332             raise ExtractorError('Could not find XML element %s' % name)
 333         else:
 334             return None
 335     return n
 336
 337
 338 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 339     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 340     if n is None or n == default:
 341         return n
 342     if n.text is None:
 343         if default is not NO_DEFAULT:
 344             return default
 345         elif fatal:
 346             name = xpath if name is None else name
 347             raise ExtractorError('Could not find XML element\'s text %s' % name)
 348         else:
 349             return None
 350     return n.text
 351
 352
 353 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 354     n = find_xpath_attr(node, xpath, key)
 355     if n is None:
 356         if default is not NO_DEFAULT:
 357             return default
 358         elif fatal:
 359             name = f'{xpath}[@{key}]' if name is None else name
 360             raise ExtractorError('Could not find XML attribute %s' % name)
 361         else:
 362             return None
 363     return n.attrib[key]
 364
 365
 366 def get_element_by_id(id, html, **kwargs):
 367     """Return the content of the tag with the specified ID in the passed HTML document"""
 368     return get_element_by_attribute('id', id, html, **kwargs)
 369
 370
 371 def get_element_html_by_id(id, html, **kwargs):
 372     """Return the html of the tag with the specified ID in the passed HTML document"""
 373     return get_element_html_by_attribute('id', id, html, **kwargs)
 374
 375
 376 def get_element_by_class(class_name, html):
 377     """Return the content of the first tag with the specified class in the passed HTML document"""
 378     retval = get_elements_by_class(class_name, html)
 379     return retval[0] if retval else None
 380
 381
 382 def get_element_html_by_class(class_name, html):
 383     """Return the html of the first tag with the specified class in the passed HTML document"""
 384     retval = get_elements_html_by_class(class_name, html)
 385     return retval[0] if retval else None
 386
 387
 388 def get_element_by_attribute(attribute, value, html, **kwargs):
 389     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 390     return retval[0] if retval else None
 391
 392
 393 def get_element_html_by_attribute(attribute, value, html, **kargs):
 394     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 395     return retval[0] if retval else None
 396
 397
 398 def get_elements_by_class(class_name, html, **kargs):
 399     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 400     return get_elements_by_attribute(
 401         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 402         html, escape_value=False)
 403
 404
 405 def get_elements_html_by_class(class_name, html):
 406     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 407     return get_elements_html_by_attribute(
 408         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 409         html, escape_value=False)
 410
 411
 412 def get_elements_by_attribute(*args, **kwargs):
 413     """Return the content of the tag with the specified attribute in the passed HTML document"""
 414     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 415
 416
 417 def get_elements_html_by_attribute(*args, **kwargs):
 418     """Return the html of the tag with the specified attribute in the passed HTML document"""
 419     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 420
 421
 422 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 423     """
 424     Return the text (content) and the html (whole) of the tag with the specified
 425     attribute in the passed HTML document
 426     """
 427
 428     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 429
 430     value = re.escape(value) if escape_value else value
 431
 432     partial_element_re = rf'''(?x)
 433         <(?P<tag>[a-zA-Z0-9:._-]+)
 434          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 435          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 436         '''
 437
 438     for m in re.finditer(partial_element_re, html):
 439         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 440
 441         yield (
 442             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 443             whole
 444         )
 445
 446
 447 class HTMLBreakOnClosingTagParser(compat_HTMLParser):
 448     """
 449     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 450     closing tag for the first opening tag it has encountered, and can be used
 451     as a context manager
 452     """
 453
 454     class HTMLBreakOnClosingTagException(Exception):
 455         pass
 456
 457     def __init__(self):
 458         self.tagstack = collections.deque()
 459         compat_HTMLParser.__init__(self)
 460
 461     def __enter__(self):
 462         return self
 463
 464     def __exit__(self, *_):
 465         self.close()
 466
 467     def close(self):
 468         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 469         # so data remains buffered; we no longer have any interest in it, thus
 470         # override this method to discard it
 471         pass
 472
 473     def handle_starttag(self, tag, _):
 474         self.tagstack.append(tag)
 475
 476     def handle_endtag(self, tag):
 477         if not self.tagstack:
 478             raise compat_HTMLParseError('no tags in the stack')
 479         while self.tagstack:
 480             inner_tag = self.tagstack.pop()
 481             if inner_tag == tag:
 482                 break
 483         else:
 484             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 485         if not self.tagstack:
 486             raise self.HTMLBreakOnClosingTagException()
 487
 488
 489 def get_element_text_and_html_by_tag(tag, html):
 490     """
 491     For the first element with the specified tag in the passed HTML document
 492     return its' content (text) and the whole element (html)
 493     """
 494     def find_or_raise(haystack, needle, exc):
 495         try:
 496             return haystack.index(needle)
 497         except ValueError:
 498             raise exc
 499     closing_tag = f'</{tag}>'
 500     whole_start = find_or_raise(
 501         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 502     content_start = find_or_raise(
 503         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 504     content_start += whole_start + 1
 505     with HTMLBreakOnClosingTagParser() as parser:
 506         parser.feed(html[whole_start:content_start])
 507         if not parser.tagstack or parser.tagstack[0] != tag:
 508             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 509         offset = content_start
 510         while offset < len(html):
 511             next_closing_tag_start = find_or_raise(
 512                 html[offset:], closing_tag,
 513                 compat_HTMLParseError(f'closing {tag} tag not found'))
 514             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 515             try:
 516                 parser.feed(html[offset:offset + next_closing_tag_end])
 517                 offset += next_closing_tag_end
 518             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 519                 return html[content_start:offset + next_closing_tag_start], \
 520                     html[whole_start:offset + next_closing_tag_end]
 521         raise compat_HTMLParseError('unexpected end of html')
 522
 523
 524 class HTMLAttributeParser(compat_HTMLParser):
 525     """Trivial HTML parser to gather the attributes for a single element"""
 526
 527     def __init__(self):
 528         self.attrs = {}
 529         compat_HTMLParser.__init__(self)
 530
 531     def handle_starttag(self, tag, attrs):
 532         self.attrs = dict(attrs)
 533
 534
 535 class HTMLListAttrsParser(compat_HTMLParser):
 536     """HTML parser to gather the attributes for the elements of a list"""
 537
 538     def __init__(self):
 539         compat_HTMLParser.__init__(self)
 540         self.items = []
 541         self._level = 0
 542
 543     def handle_starttag(self, tag, attrs):
 544         if tag == 'li' and self._level == 0:
 545             self.items.append(dict(attrs))
 546         self._level += 1
 547
 548     def handle_endtag(self, tag):
 549         self._level -= 1
 550
 551
 552 def extract_attributes(html_element):
 553     """Given a string for an HTML element such as
 554     <el
 555          a="foo" B="bar" c="&98;az" d=boz
 556          empty= noval entity="&amp;"
 557          sq='"' dq="'"
 558     >
 559     Decode and return a dictionary of attributes.
 560     {
 561         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 562         'empty': '', 'noval': None, 'entity': '&',
 563         'sq': '"', 'dq': '\''
 564     }.
 565     """
 566     parser = HTMLAttributeParser()
 567     with contextlib.suppress(compat_HTMLParseError):
 568         parser.feed(html_element)
 569         parser.close()
 570     return parser.attrs
 571
 572
 573 def parse_list(webpage):
 574     """Given a string for an series of HTML <li> elements,
 575     return a dictionary of their attributes"""
 576     parser = HTMLListAttrsParser()
 577     parser.feed(webpage)
 578     parser.close()
 579     return parser.items
 580
 581
 582 def clean_html(html):
 583     """Clean an HTML snippet into a readable string"""
 584
 585     if html is None:  # Convenience for sanitizing descriptions etc.
 586         return html
 587
 588     html = re.sub(r'\s+', ' ', html)
 589     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 590     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 591     # Strip html tags
 592     html = re.sub('<.*?>', '', html)
 593     # Replace html entities
 594     html = unescapeHTML(html)
 595     return html.strip()
 596
 597
 598 class LenientJSONDecoder(json.JSONDecoder):
 599     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 600         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 601         super().__init__(*args, **kwargs)
 602
 603     def decode(self, s):
 604         if self.transform_source:
 605             s = self.transform_source(s)
 606         if self.ignore_extra:
 607             return self.raw_decode(s.lstrip())[0]
 608         return super().decode(s)
 609
 610
 611 def sanitize_open(filename, open_mode):
 612     """Try to open the given filename, and slightly tweak it if this fails.
 613
 614     Attempts to open the given filename. If this fails, it tries to change
 615     the filename slightly, step by step, until it's either able to open it
 616     or it fails and raises a final exception, like the standard open()
 617     function.
 618
 619     It returns the tuple (stream, definitive_file_name).
 620     """
 621     if filename == '-':
 622         if sys.platform == 'win32':
 623             import msvcrt
 624             msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 625         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 626
 627     for attempt in range(2):
 628         try:
 629             try:
 630                 if sys.platform == 'win32':
 631                     # FIXME: An exclusive lock also locks the file from being read.
 632                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 633                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 634                     raise LockingUnsupportedError()
 635                 stream = locked_file(filename, open_mode, block=False).__enter__()
 636             except OSError:
 637                 stream = open(filename, open_mode)
 638             return stream, filename
 639         except OSError as err:
 640             if attempt or err.errno in (errno.EACCES,):
 641                 raise
 642             old_filename, filename = filename, sanitize_path(filename)
 643             if old_filename == filename:
 644                 raise
 645
 646
 647 def timeconvert(timestr):
 648     """Convert RFC 2822 defined time string into system timestamp"""
 649     timestamp = None
 650     timetuple = email.utils.parsedate_tz(timestr)
 651     if timetuple is not None:
 652         timestamp = email.utils.mktime_tz(timetuple)
 653     return timestamp
 654
 655
 656 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 657     """Sanitizes a string so it could be used as part of a filename.
 658     @param restricted   Use a stricter subset of allowed characters
 659     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 660                         If unset, yt-dlp's new sanitization rules are in effect
 661     """
 662     if s == '':
 663         return ''
 664
 665     def replace_insane(char):
 666         if restricted and char in ACCENT_CHARS:
 667             return ACCENT_CHARS[char]
 668         elif not restricted and char == '\n':
 669             return '\0 '
 670         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 671             return ''
 672         elif char == '"':
 673             return '' if restricted else '\''
 674         elif char == ':':
 675             return '\0_\0-' if restricted else '\0 \0-'
 676         elif char in '\\/|*<>':
 677             return '\0_'
 678         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 679             return '\0_'
 680         return char
 681
 682     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 683     result = ''.join(map(replace_insane, s))
 684     if is_id is NO_DEFAULT:
 685         result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result)  # Remove repeated substitute chars
 686         STRIP_RE = '(?:\0.|[ _-])*'
 687         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 688     result = result.replace('\0', '') or '_'
 689
 690     if not is_id:
 691         while '__' in result:
 692             result = result.replace('__', '_')
 693         result = result.strip('_')
 694         # Common case of "Foreign band name - English song title"
 695         if restricted and result.startswith('-_'):
 696             result = result[2:]
 697         if result.startswith('-'):
 698             result = '_' + result[len('-'):]
 699         result = result.lstrip('.')
 700         if not result:
 701             result = '_'
 702     return result
 703
 704
 705 def sanitize_path(s, force=False):
 706     """Sanitizes and normalizes path on Windows"""
 707     if sys.platform == 'win32':
 708         force = False
 709         drive_or_unc, _ = os.path.splitdrive(s)
 710     elif force:
 711         drive_or_unc = ''
 712     else:
 713         return s
 714
 715     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 716     if drive_or_unc:
 717         norm_path.pop(0)
 718     sanitized_path = [
 719         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 720         for path_part in norm_path]
 721     if drive_or_unc:
 722         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 723     elif force and s and s[0] == os.path.sep:
 724         sanitized_path.insert(0, os.path.sep)
 725     return os.path.join(*sanitized_path)
 726
 727
 728 def sanitize_url(url):
 729     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 730     # the number of unwanted failures due to missing protocol
 731     if url is None:
 732         return
 733     elif url.startswith('//'):
 734         return 'http:%s' % url
 735     # Fix some common typos seen so far
 736     COMMON_TYPOS = (
 737         # https://github.com/ytdl-org/youtube-dl/issues/15649
 738         (r'^httpss://', r'https://'),
 739         # https://bx1.be/lives/direct-tv/
 740         (r'^rmtp([es]?)://', r'rtmp\1://'),
 741     )
 742     for mistake, fixup in COMMON_TYPOS:
 743         if re.match(mistake, url):
 744             return re.sub(mistake, fixup, url)
 745     return url
 746
 747
 748 def extract_basic_auth(url):
 749     parts = compat_urlparse.urlsplit(url)
 750     if parts.username is None:
 751         return url, None
 752     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
 753         parts.hostname if parts.port is None
 754         else '%s:%d' % (parts.hostname, parts.port))))
 755     auth_payload = base64.b64encode(
 756         ('%s:%s' % (parts.username, parts.password or '')).encode())
 757     return url, f'Basic {auth_payload.decode()}'
 758
 759
 760 def sanitized_Request(url, *args, **kwargs):
 761     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 762     if auth_header is not None:
 763         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 764         headers['Authorization'] = auth_header
 765     return compat_urllib_request.Request(url, *args, **kwargs)
 766
 767
 768 def expand_path(s):
 769     """Expand shell variables and ~"""
 770     return os.path.expandvars(compat_expanduser(s))
 771
 772
 773 def orderedSet(iterable, *, lazy=False):
 774     """Remove all duplicates from the input iterable"""
 775     def _iter():
 776         seen = []  # Do not use set since the items can be unhashable
 777         for x in iterable:
 778             if x not in seen:
 779                 seen.append(x)
 780                 yield x
 781
 782     return _iter() if lazy else list(_iter())
 783
 784
 785 def _htmlentity_transform(entity_with_semicolon):
 786     """Transforms an HTML entity to a character."""
 787     entity = entity_with_semicolon[:-1]
 788
 789     # Known non-numeric HTML entity
 790     if entity in compat_html_entities.name2codepoint:
 791         return compat_chr(compat_html_entities.name2codepoint[entity])
 792
 793     # TODO: HTML5 allows entities without a semicolon. For example,
 794     # '&Eacuteric' should be decoded as 'Éric'.
 795     if entity_with_semicolon in compat_html_entities_html5:
 796         return compat_html_entities_html5[entity_with_semicolon]
 797
 798     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 799     if mobj is not None:
 800         numstr = mobj.group(1)
 801         if numstr.startswith('x'):
 802             base = 16
 803             numstr = '0%s' % numstr
 804         else:
 805             base = 10
 806         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 807         with contextlib.suppress(ValueError):
 808             return compat_chr(int(numstr, base))
 809
 810     # Unknown entity in name, return its literal representation
 811     return '&%s;' % entity
 812
 813
 814 def unescapeHTML(s):
 815     if s is None:
 816         return None
 817     assert isinstance(s, str)
 818
 819     return re.sub(
 820         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 821
 822
 823 def escapeHTML(text):
 824     return (
 825         text
 826         .replace('&', '&amp;')
 827         .replace('<', '&lt;')
 828         .replace('>', '&gt;')
 829         .replace('"', '&quot;')
 830         .replace("'", '&#39;')
 831     )
 832
 833
 834 def process_communicate_or_kill(p, *args, **kwargs):
 835     write_string('DeprecationWarning: yt_dlp.utils.process_communicate_or_kill is deprecated '
 836                  'and may be removed in a future version. Use yt_dlp.utils.Popen.communicate_or_kill instead')
 837     return Popen.communicate_or_kill(p, *args, **kwargs)
 838
 839
 840 class Popen(subprocess.Popen):
 841     if sys.platform == 'win32':
 842         _startupinfo = subprocess.STARTUPINFO()
 843         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 844     else:
 845         _startupinfo = None
 846
 847     def __init__(self, *args, text=False, **kwargs):
 848         if text is True:
 849             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 850             kwargs.setdefault('encoding', 'utf-8')
 851             kwargs.setdefault('errors', 'replace')
 852         super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
 853
 854     def communicate_or_kill(self, *args, **kwargs):
 855         try:
 856             return self.communicate(*args, **kwargs)
 857         except BaseException:  # Including KeyboardInterrupt
 858             self.kill(timeout=None)
 859             raise
 860
 861     def kill(self, *, timeout=0):
 862         super().kill()
 863         if timeout != 0:
 864             self.wait(timeout=timeout)
 865
 866     @classmethod
 867     def run(cls, *args, **kwargs):
 868         with cls(*args, **kwargs) as proc:
 869             stdout, stderr = proc.communicate_or_kill()
 870             return stdout or '', stderr or '', proc.returncode
 871
 872
 873 def get_subprocess_encoding():
 874     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 875         # For subprocess calls, encode with locale encoding
 876         # Refer to http://stackoverflow.com/a/9951851/35070
 877         encoding = preferredencoding()
 878     else:
 879         encoding = sys.getfilesystemencoding()
 880     if encoding is None:
 881         encoding = 'utf-8'
 882     return encoding
 883
 884
 885 def encodeFilename(s, for_subprocess=False):
 886     assert isinstance(s, str)
 887     return s
 888
 889
 890 def decodeFilename(b, for_subprocess=False):
 891     return b
 892
 893
 894 def encodeArgument(s):
 895     # Legacy code that uses byte strings
 896     # Uncomment the following line after fixing all post processors
 897     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 898     return s if isinstance(s, str) else s.decode('ascii')
 899
 900
 901 def decodeArgument(b):
 902     return b
 903
 904
 905 def decodeOption(optval):
 906     if optval is None:
 907         return optval
 908     if isinstance(optval, bytes):
 909         optval = optval.decode(preferredencoding())
 910
 911     assert isinstance(optval, compat_str)
 912     return optval
 913
 914
 915 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 916
 917
 918 def timetuple_from_msec(msec):
 919     secs, msec = divmod(msec, 1000)
 920     mins, secs = divmod(secs, 60)
 921     hrs, mins = divmod(mins, 60)
 922     return _timetuple(hrs, mins, secs, msec)
 923
 924
 925 def formatSeconds(secs, delim=':', msec=False):
 926     time = timetuple_from_msec(secs * 1000)
 927     if time.hours:
 928         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 929     elif time.minutes:
 930         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 931     else:
 932         ret = '%d' % time.seconds
 933     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 934
 935
 936 def _ssl_load_windows_store_certs(ssl_context, storename):
 937     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 938     try:
 939         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 940                  if encoding == 'x509_asn' and (
 941                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 942     except PermissionError:
 943         return
 944     for cert in certs:
 945         with contextlib.suppress(ssl.SSLError):
 946             ssl_context.load_verify_locations(cadata=cert)
 947
 948
 949 def make_HTTPS_handler(params, **kwargs):
 950     opts_check_certificate = not params.get('nocheckcertificate')
 951     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 952     context.check_hostname = opts_check_certificate
 953     if params.get('legacyserverconnect'):
 954         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 955         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 956         context.set_ciphers('DEFAULT')
 957
 958     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 959     if opts_check_certificate:
 960         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 961             context.load_verify_locations(cafile=certifi.where())
 962         try:
 963             context.load_default_certs()
 964         # Work around the issue in load_default_certs when there are bad certificates. See:
 965         # https://github.com/yt-dlp/yt-dlp/issues/1060,
 966         # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 967         except ssl.SSLError:
 968             # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 969             if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 970                 for storename in ('CA', 'ROOT'):
 971                     _ssl_load_windows_store_certs(context, storename)
 972             context.set_default_verify_paths()
 973
 974     client_certfile = params.get('client_certificate')
 975     if client_certfile:
 976         try:
 977             context.load_cert_chain(
 978                 client_certfile, keyfile=params.get('client_certificate_key'),
 979                 password=params.get('client_certificate_password'))
 980         except ssl.SSLError:
 981             raise YoutubeDLError('Unable to load client certificate')
 982
 983     # Some servers may reject requests if ALPN extension is not sent. See:
 984     # https://github.com/python/cpython/issues/85140
 985     # https://github.com/yt-dlp/yt-dlp/issues/3878
 986     with contextlib.suppress(NotImplementedError):
 987         context.set_alpn_protocols(['http/1.1'])
 988
 989     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 990
 991
 992 def bug_reports_message(before=';'):
 993     msg = ('please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , '
 994            'filling out the appropriate issue template. '
 995            'Confirm you are on the latest version using  yt-dlp -U')
 996
 997     before = before.rstrip()
 998     if not before or before.endswith(('.', '!', '?')):
 999         msg = msg[0].title() + msg[1:]
1000
1001     return (before + ' ' if before else '') + msg
1002
1003
1004 class YoutubeDLError(Exception):
1005     """Base exception for YoutubeDL errors."""
1006     msg = None
1007
1008     def __init__(self, msg=None):
1009         if msg is not None:
1010             self.msg = msg
1011         elif self.msg is None:
1012             self.msg = type(self).__name__
1013         super().__init__(self.msg)
1014
1015
1016 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
1017 if hasattr(ssl, 'CertificateError'):
1018     network_exceptions.append(ssl.CertificateError)
1019 network_exceptions = tuple(network_exceptions)
1020
1021
1022 class ExtractorError(YoutubeDLError):
1023     """Error during info extraction."""
1024
1025     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1026         """ tb, if given, is the original traceback (so that it can be printed out).
1027         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1028         """
1029         if sys.exc_info()[0] in network_exceptions:
1030             expected = True
1031
1032         self.orig_msg = str(msg)
1033         self.traceback = tb
1034         self.expected = expected
1035         self.cause = cause
1036         self.video_id = video_id
1037         self.ie = ie
1038         self.exc_info = sys.exc_info()  # preserve original exception
1039
1040         super().__init__(''.join((
1041             format_field(ie, None, '[%s] '),
1042             format_field(video_id, None, '%s: '),
1043             msg,
1044             format_field(cause, None, ' (caused by %r)'),
1045             '' if expected else bug_reports_message())))
1046
1047     def format_traceback(self):
1048         return join_nonempty(
1049             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1050             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1051             delim='\n') or None
1052
1053
1054 class UnsupportedError(ExtractorError):
1055     def __init__(self, url):
1056         super().__init__(
1057             'Unsupported URL: %s' % url, expected=True)
1058         self.url = url
1059
1060
1061 class RegexNotFoundError(ExtractorError):
1062     """Error when a regex didn't match"""
1063     pass
1064
1065
1066 class GeoRestrictedError(ExtractorError):
1067     """Geographic restriction Error exception.
1068
1069     This exception may be thrown when a video is not available from your
1070     geographic location due to geographic restrictions imposed by a website.
1071     """
1072
1073     def __init__(self, msg, countries=None, **kwargs):
1074         kwargs['expected'] = True
1075         super().__init__(msg, **kwargs)
1076         self.countries = countries
1077
1078
1079 class DownloadError(YoutubeDLError):
1080     """Download Error exception.
1081
1082     This exception may be thrown by FileDownloader objects if they are not
1083     configured to continue on errors. They will contain the appropriate
1084     error message.
1085     """
1086
1087     def __init__(self, msg, exc_info=None):
1088         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1089         super().__init__(msg)
1090         self.exc_info = exc_info
1091
1092
1093 class EntryNotInPlaylist(YoutubeDLError):
1094     """Entry not in playlist exception.
1095
1096     This exception will be thrown by YoutubeDL when a requested entry
1097     is not found in the playlist info_dict
1098     """
1099     msg = 'Entry not found in info'
1100
1101
1102 class SameFileError(YoutubeDLError):
1103     """Same File exception.
1104
1105     This exception will be thrown by FileDownloader objects if they detect
1106     multiple files would have to be downloaded to the same file on disk.
1107     """
1108     msg = 'Fixed output name but more than one file to download'
1109
1110     def __init__(self, filename=None):
1111         if filename is not None:
1112             self.msg += f': {filename}'
1113         super().__init__(self.msg)
1114
1115
1116 class PostProcessingError(YoutubeDLError):
1117     """Post Processing exception.
1118
1119     This exception may be raised by PostProcessor's .run() method to
1120     indicate an error in the postprocessing task.
1121     """
1122
1123
1124 class DownloadCancelled(YoutubeDLError):
1125     """ Exception raised when the download queue should be interrupted """
1126     msg = 'The download was cancelled'
1127
1128
1129 class ExistingVideoReached(DownloadCancelled):
1130     """ --break-on-existing triggered """
1131     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1132
1133
1134 class RejectedVideoReached(DownloadCancelled):
1135     """ --break-on-reject triggered """
1136     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1137
1138
1139 class MaxDownloadsReached(DownloadCancelled):
1140     """ --max-downloads limit has been reached. """
1141     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1142
1143
1144 class ReExtractInfo(YoutubeDLError):
1145     """ Video info needs to be re-extracted. """
1146
1147     def __init__(self, msg, expected=False):
1148         super().__init__(msg)
1149         self.expected = expected
1150
1151
1152 class ThrottledDownload(ReExtractInfo):
1153     """ Download speed below --throttled-rate. """
1154     msg = 'The download speed is below throttle limit'
1155
1156     def __init__(self):
1157         super().__init__(self.msg, expected=False)
1158
1159
1160 class UnavailableVideoError(YoutubeDLError):
1161     """Unavailable Format exception.
1162
1163     This exception will be thrown when a video is requested
1164     in a format that is not available for that video.
1165     """
1166     msg = 'Unable to download video'
1167
1168     def __init__(self, err=None):
1169         if err is not None:
1170             self.msg += f': {err}'
1171         super().__init__(self.msg)
1172
1173
1174 class ContentTooShortError(YoutubeDLError):
1175     """Content Too Short exception.
1176
1177     This exception may be raised by FileDownloader objects when a file they
1178     download is too small for what the server announced first, indicating
1179     the connection was probably interrupted.
1180     """
1181
1182     def __init__(self, downloaded, expected):
1183         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1184         # Both in bytes
1185         self.downloaded = downloaded
1186         self.expected = expected
1187
1188
1189 class XAttrMetadataError(YoutubeDLError):
1190     def __init__(self, code=None, msg='Unknown error'):
1191         super().__init__(msg)
1192         self.code = code
1193         self.msg = msg
1194
1195         # Parsing code and msg
1196         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1197                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1198             self.reason = 'NO_SPACE'
1199         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1200             self.reason = 'VALUE_TOO_LONG'
1201         else:
1202             self.reason = 'NOT_SUPPORTED'
1203
1204
1205 class XAttrUnavailableError(YoutubeDLError):
1206     pass
1207
1208
1209 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1210     hc = http_class(*args, **kwargs)
1211     source_address = ydl_handler._params.get('source_address')
1212
1213     if source_address is not None:
1214         # This is to workaround _create_connection() from socket where it will try all
1215         # address data from getaddrinfo() including IPv6. This filters the result from
1216         # getaddrinfo() based on the source_address value.
1217         # This is based on the cpython socket.create_connection() function.
1218         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1219         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1220             host, port = address
1221             err = None
1222             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1223             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1224             ip_addrs = [addr for addr in addrs if addr[0] == af]
1225             if addrs and not ip_addrs:
1226                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1227                 raise OSError(
1228                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1229                     % (ip_version, source_address[0]))
1230             for res in ip_addrs:
1231                 af, socktype, proto, canonname, sa = res
1232                 sock = None
1233                 try:
1234                     sock = socket.socket(af, socktype, proto)
1235                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1236                         sock.settimeout(timeout)
1237                     sock.bind(source_address)
1238                     sock.connect(sa)
1239                     err = None  # Explicitly break reference cycle
1240                     return sock
1241                 except OSError as _:
1242                     err = _
1243                     if sock is not None:
1244                         sock.close()
1245             if err is not None:
1246                 raise err
1247             else:
1248                 raise OSError('getaddrinfo returns an empty list')
1249         if hasattr(hc, '_create_connection'):
1250             hc._create_connection = _create_connection
1251         hc.source_address = (source_address, 0)
1252
1253     return hc
1254
1255
1256 def handle_youtubedl_headers(headers):
1257     filtered_headers = headers
1258
1259     if 'Youtubedl-no-compression' in filtered_headers:
1260         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1261         del filtered_headers['Youtubedl-no-compression']
1262
1263     return filtered_headers
1264
1265
1266 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
1267     """Handler for HTTP requests and responses.
1268
1269     This class, when installed with an OpenerDirector, automatically adds
1270     the standard headers to every HTTP request and handles gzipped and
1271     deflated responses from web servers. If compression is to be avoided in
1272     a particular request, the original request in the program code only has
1273     to include the HTTP header "Youtubedl-no-compression", which will be
1274     removed before making the real request.
1275
1276     Part of this code was copied from:
1277
1278     http://techknack.net/python-urllib2-handlers/
1279
1280     Andrew Rowls, the author of that code, agreed to release it to the
1281     public domain.
1282     """
1283
1284     def __init__(self, params, *args, **kwargs):
1285         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
1286         self._params = params
1287
1288     def http_open(self, req):
1289         conn_class = compat_http_client.HTTPConnection
1290
1291         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1292         if socks_proxy:
1293             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1294             del req.headers['Ytdl-socks-proxy']
1295
1296         return self.do_open(functools.partial(
1297             _create_http_connection, self, conn_class, False),
1298             req)
1299
1300     @staticmethod
1301     def deflate(data):
1302         if not data:
1303             return data
1304         try:
1305             return zlib.decompress(data, -zlib.MAX_WBITS)
1306         except zlib.error:
1307             return zlib.decompress(data)
1308
1309     @staticmethod
1310     def brotli(data):
1311         if not data:
1312             return data
1313         return brotli.decompress(data)
1314
1315     def http_request(self, req):
1316         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1317         # always respected by websites, some tend to give out URLs with non percent-encoded
1318         # non-ASCII characters (see telemb.py, ard.py [#3412])
1319         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1320         # To work around aforementioned issue we will replace request's original URL with
1321         # percent-encoded one
1322         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1323         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1324         url = req.get_full_url()
1325         url_escaped = escape_url(url)
1326
1327         # Substitute URL if any change after escaping
1328         if url != url_escaped:
1329             req = update_Request(req, url=url_escaped)
1330
1331         for h, v in self._params.get('http_headers', std_headers).items():
1332             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1333             # The dict keys are capitalized because of this bug by urllib
1334             if h.capitalize() not in req.headers:
1335                 req.add_header(h, v)
1336
1337         if 'Accept-encoding' not in req.headers:
1338             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1339
1340         req.headers = handle_youtubedl_headers(req.headers)
1341
1342         return req
1343
1344     def http_response(self, req, resp):
1345         old_resp = resp
1346         # gzip
1347         if resp.headers.get('Content-encoding', '') == 'gzip':
1348             content = resp.read()
1349             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1350             try:
1351                 uncompressed = io.BytesIO(gz.read())
1352             except OSError as original_ioerror:
1353                 # There may be junk add the end of the file
1354                 # See http://stackoverflow.com/q/4928560/35070 for details
1355                 for i in range(1, 1024):
1356                     try:
1357                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1358                         uncompressed = io.BytesIO(gz.read())
1359                     except OSError:
1360                         continue
1361                     break
1362                 else:
1363                     raise original_ioerror
1364             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1365             resp.msg = old_resp.msg
1366             del resp.headers['Content-encoding']
1367         # deflate
1368         if resp.headers.get('Content-encoding', '') == 'deflate':
1369             gz = io.BytesIO(self.deflate(resp.read()))
1370             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1371             resp.msg = old_resp.msg
1372             del resp.headers['Content-encoding']
1373         # brotli
1374         if resp.headers.get('Content-encoding', '') == 'br':
1375             resp = compat_urllib_request.addinfourl(
1376                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1377             resp.msg = old_resp.msg
1378             del resp.headers['Content-encoding']
1379         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1380         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1381         if 300 <= resp.code < 400:
1382             location = resp.headers.get('Location')
1383             if location:
1384                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1385                 location = location.encode('iso-8859-1').decode()
1386                 location_escaped = escape_url(location)
1387                 if location != location_escaped:
1388                     del resp.headers['Location']
1389                     resp.headers['Location'] = location_escaped
1390         return resp
1391
1392     https_request = http_request
1393     https_response = http_response
1394
1395
1396 def make_socks_conn_class(base_class, socks_proxy):
1397     assert issubclass(base_class, (
1398         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
1399
1400     url_components = compat_urlparse.urlparse(socks_proxy)
1401     if url_components.scheme.lower() == 'socks5':
1402         socks_type = ProxyType.SOCKS5
1403     elif url_components.scheme.lower() in ('socks', 'socks4'):
1404         socks_type = ProxyType.SOCKS4
1405     elif url_components.scheme.lower() == 'socks4a':
1406         socks_type = ProxyType.SOCKS4A
1407
1408     def unquote_if_non_empty(s):
1409         if not s:
1410             return s
1411         return compat_urllib_parse_unquote_plus(s)
1412
1413     proxy_args = (
1414         socks_type,
1415         url_components.hostname, url_components.port or 1080,
1416         True,  # Remote DNS
1417         unquote_if_non_empty(url_components.username),
1418         unquote_if_non_empty(url_components.password),
1419     )
1420
1421     class SocksConnection(base_class):
1422         def connect(self):
1423             self.sock = sockssocket()
1424             self.sock.setproxy(*proxy_args)
1425             if isinstance(self.timeout, (int, float)):
1426                 self.sock.settimeout(self.timeout)
1427             self.sock.connect((self.host, self.port))
1428
1429             if isinstance(self, compat_http_client.HTTPSConnection):
1430                 if hasattr(self, '_context'):  # Python > 2.6
1431                     self.sock = self._context.wrap_socket(
1432                         self.sock, server_hostname=self.host)
1433                 else:
1434                     self.sock = ssl.wrap_socket(self.sock)
1435
1436     return SocksConnection
1437
1438
1439 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
1440     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1441         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
1442         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
1443         self._params = params
1444
1445     def https_open(self, req):
1446         kwargs = {}
1447         conn_class = self._https_conn_class
1448
1449         if hasattr(self, '_context'):  # python > 2.6
1450             kwargs['context'] = self._context
1451         if hasattr(self, '_check_hostname'):  # python 3.x
1452             kwargs['check_hostname'] = self._check_hostname
1453
1454         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1455         if socks_proxy:
1456             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1457             del req.headers['Ytdl-socks-proxy']
1458
1459         try:
1460             return self.do_open(
1461                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1462         except urllib.error.URLError as e:
1463             if (isinstance(e.reason, ssl.SSLError)
1464                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1465                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1466             raise
1467
1468
1469 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1470     """
1471     See [1] for cookie file format.
1472
1473     1. https://curl.haxx.se/docs/http-cookies.html
1474     """
1475     _HTTPONLY_PREFIX = '#HttpOnly_'
1476     _ENTRY_LEN = 7
1477     _HEADER = '''# Netscape HTTP Cookie File
1478 # This file is generated by yt-dlp.  Do not edit.
1479
1480 '''
1481     _CookieFileEntry = collections.namedtuple(
1482         'CookieFileEntry',
1483         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1484
1485     def __init__(self, filename=None, *args, **kwargs):
1486         super().__init__(None, *args, **kwargs)
1487         if self.is_path(filename):
1488             filename = os.fspath(filename)
1489         self.filename = filename
1490
1491     @staticmethod
1492     def _true_or_false(cndn):
1493         return 'TRUE' if cndn else 'FALSE'
1494
1495     @staticmethod
1496     def is_path(file):
1497         return isinstance(file, (str, bytes, os.PathLike))
1498
1499     @contextlib.contextmanager
1500     def open(self, file, *, write=False):
1501         if self.is_path(file):
1502             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1503                 yield f
1504         else:
1505             if write:
1506                 file.truncate(0)
1507             yield file
1508
1509     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1510         now = time.time()
1511         for cookie in self:
1512             if (not ignore_discard and cookie.discard
1513                     or not ignore_expires and cookie.is_expired(now)):
1514                 continue
1515             name, value = cookie.name, cookie.value
1516             if value is None:
1517                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1518                 # with no name, whereas http.cookiejar regards it as a
1519                 # cookie with no value.
1520                 name, value = '', name
1521             f.write('%s\n' % '\t'.join((
1522                 cookie.domain,
1523                 self._true_or_false(cookie.domain.startswith('.')),
1524                 cookie.path,
1525                 self._true_or_false(cookie.secure),
1526                 str_or_none(cookie.expires, default=''),
1527                 name, value
1528             )))
1529
1530     def save(self, filename=None, *args, **kwargs):
1531         """
1532         Save cookies to a file.
1533         Code is taken from CPython 3.6
1534         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1535
1536         if filename is None:
1537             if self.filename is not None:
1538                 filename = self.filename
1539             else:
1540                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1541
1542         # Store session cookies with `expires` set to 0 instead of an empty string
1543         for cookie in self:
1544             if cookie.expires is None:
1545                 cookie.expires = 0
1546
1547         with self.open(filename, write=True) as f:
1548             f.write(self._HEADER)
1549             self._really_save(f, *args, **kwargs)
1550
1551     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1552         """Load cookies from a file."""
1553         if filename is None:
1554             if self.filename is not None:
1555                 filename = self.filename
1556             else:
1557                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1558
1559         def prepare_line(line):
1560             if line.startswith(self._HTTPONLY_PREFIX):
1561                 line = line[len(self._HTTPONLY_PREFIX):]
1562             # comments and empty lines are fine
1563             if line.startswith('#') or not line.strip():
1564                 return line
1565             cookie_list = line.split('\t')
1566             if len(cookie_list) != self._ENTRY_LEN:
1567                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
1568             cookie = self._CookieFileEntry(*cookie_list)
1569             if cookie.expires_at and not cookie.expires_at.isdigit():
1570                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1571             return line
1572
1573         cf = io.StringIO()
1574         with self.open(filename) as f:
1575             for line in f:
1576                 try:
1577                     cf.write(prepare_line(line))
1578                 except compat_cookiejar.LoadError as e:
1579                     if f'{line.strip()} '[0] in '[{"':
1580                         raise compat_cookiejar.LoadError(
1581                             'Cookies file must be Netscape formatted, not JSON. See  '
1582                             'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl')
1583                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1584                     continue
1585         cf.seek(0)
1586         self._really_load(cf, filename, ignore_discard, ignore_expires)
1587         # Session cookies are denoted by either `expires` field set to
1588         # an empty string or 0. MozillaCookieJar only recognizes the former
1589         # (see [1]). So we need force the latter to be recognized as session
1590         # cookies on our own.
1591         # Session cookies may be important for cookies-based authentication,
1592         # e.g. usually, when user does not check 'Remember me' check box while
1593         # logging in on a site, some important cookies are stored as session
1594         # cookies so that not recognizing them will result in failed login.
1595         # 1. https://bugs.python.org/issue17164
1596         for cookie in self:
1597             # Treat `expires=0` cookies as session cookies
1598             if cookie.expires == 0:
1599                 cookie.expires = None
1600                 cookie.discard = True
1601
1602
1603 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
1604     def __init__(self, cookiejar=None):
1605         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
1606
1607     def http_response(self, request, response):
1608         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
1609
1610     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
1611     https_response = http_response
1612
1613
1614 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
1615     """YoutubeDL redirect handler
1616
1617     The code is based on HTTPRedirectHandler implementation from CPython [1].
1618
1619     This redirect handler solves two issues:
1620      - ensures redirect URL is always unicode under python 2
1621      - introduces support for experimental HTTP response status code
1622        308 Permanent Redirect [2] used by some sites [3]
1623
1624     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1625     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1626     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1627     """
1628
1629     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
1630
1631     def redirect_request(self, req, fp, code, msg, headers, newurl):
1632         """Return a Request or None in response to a redirect.
1633
1634         This is called by the http_error_30x methods when a
1635         redirection response is received.  If a redirection should
1636         take place, return a new Request to allow http_error_30x to
1637         perform the redirect.  Otherwise, raise HTTPError if no-one
1638         else should try to handle this url.  Return None if you can't
1639         but another Handler might.
1640         """
1641         m = req.get_method()
1642         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1643                  or code in (301, 302, 303) and m == "POST")):
1644             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
1645         # Strictly (according to RFC 2616), 301 or 302 in response to
1646         # a POST MUST NOT cause a redirection without confirmation
1647         # from the user (of urllib.request, in this case).  In practice,
1648         # essentially all clients do redirect in this case, so we do
1649         # the same.
1650
1651         # Be conciliant with URIs containing a space.  This is mainly
1652         # redundant with the more complete encoding done in http_error_302(),
1653         # but it is kept for compatibility with other callers.
1654         newurl = newurl.replace(' ', '%20')
1655
1656         CONTENT_HEADERS = ("content-length", "content-type")
1657         # NB: don't use dict comprehension for python 2.6 compatibility
1658         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1659
1660         # A 303 must either use GET or HEAD for subsequent request
1661         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1662         if code == 303 and m != 'HEAD':
1663             m = 'GET'
1664         # 301 and 302 redirects are commonly turned into a GET from a POST
1665         # for subsequent requests by browsers, so we'll do the same.
1666         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1667         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1668         if code in (301, 302) and m == 'POST':
1669             m = 'GET'
1670
1671         return compat_urllib_request.Request(
1672             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1673             unverifiable=True, method=m)
1674
1675
1676 def extract_timezone(date_str):
1677     m = re.search(
1678         r'''(?x)
1679             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1680             (?P<tz>Z|                                            # just the UTC Z, or
1681                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1682                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1683                    [ ]?                                          # optional space
1684                 (?P<sign>\+|-)                                   # +/-
1685                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1686             $)
1687         ''', date_str)
1688     if not m:
1689         timezone = datetime.timedelta()
1690     else:
1691         date_str = date_str[:-len(m.group('tz'))]
1692         if not m.group('sign'):
1693             timezone = datetime.timedelta()
1694         else:
1695             sign = 1 if m.group('sign') == '+' else -1
1696             timezone = datetime.timedelta(
1697                 hours=sign * int(m.group('hours')),
1698                 minutes=sign * int(m.group('minutes')))
1699     return timezone, date_str
1700
1701
1702 def parse_iso8601(date_str, delimiter='T', timezone=None):
1703     """ Return a UNIX timestamp from the given date """
1704
1705     if date_str is None:
1706         return None
1707
1708     date_str = re.sub(r'\.[0-9]+', '', date_str)
1709
1710     if timezone is None:
1711         timezone, date_str = extract_timezone(date_str)
1712
1713     with contextlib.suppress(ValueError):
1714         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1715         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1716         return calendar.timegm(dt.timetuple())
1717
1718
1719 def date_formats(day_first=True):
1720     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1721
1722
1723 def unified_strdate(date_str, day_first=True):
1724     """Return a string with the date in the format YYYYMMDD"""
1725
1726     if date_str is None:
1727         return None
1728     upload_date = None
1729     # Replace commas
1730     date_str = date_str.replace(',', ' ')
1731     # Remove AM/PM + timezone
1732     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1733     _, date_str = extract_timezone(date_str)
1734
1735     for expression in date_formats(day_first):
1736         with contextlib.suppress(ValueError):
1737             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1738     if upload_date is None:
1739         timetuple = email.utils.parsedate_tz(date_str)
1740         if timetuple:
1741             with contextlib.suppress(ValueError):
1742                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1743     if upload_date is not None:
1744         return compat_str(upload_date)
1745
1746
1747 def unified_timestamp(date_str, day_first=True):
1748     if date_str is None:
1749         return None
1750
1751     date_str = re.sub(r'[,|]', '', date_str)
1752
1753     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1754     timezone, date_str = extract_timezone(date_str)
1755
1756     # Remove AM/PM + timezone
1757     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1758
1759     # Remove unrecognized timezones from ISO 8601 alike timestamps
1760     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1761     if m:
1762         date_str = date_str[:-len(m.group('tz'))]
1763
1764     # Python only supports microseconds, so remove nanoseconds
1765     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1766     if m:
1767         date_str = m.group(1)
1768
1769     for expression in date_formats(day_first):
1770         with contextlib.suppress(ValueError):
1771             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1772             return calendar.timegm(dt.timetuple())
1773     timetuple = email.utils.parsedate_tz(date_str)
1774     if timetuple:
1775         return calendar.timegm(timetuple) + pm_delta * 3600
1776
1777
1778 def determine_ext(url, default_ext='unknown_video'):
1779     if url is None or '.' not in url:
1780         return default_ext
1781     guess = url.partition('?')[0].rpartition('.')[2]
1782     if re.match(r'^[A-Za-z0-9]+$', guess):
1783         return guess
1784     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1785     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1786         return guess.rstrip('/')
1787     else:
1788         return default_ext
1789
1790
1791 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1792     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1793
1794
1795 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1796     R"""
1797     Return a datetime object from a string.
1798     Supported format:
1799         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1800
1801     @param format       strftime format of DATE
1802     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1803                         auto: round to the unit provided in date_str (if applicable).
1804     """
1805     auto_precision = False
1806     if precision == 'auto':
1807         auto_precision = True
1808         precision = 'microsecond'
1809     today = datetime_round(datetime.datetime.utcnow(), precision)
1810     if date_str in ('now', 'today'):
1811         return today
1812     if date_str == 'yesterday':
1813         return today - datetime.timedelta(days=1)
1814     match = re.match(
1815         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1816         date_str)
1817     if match is not None:
1818         start_time = datetime_from_str(match.group('start'), precision, format)
1819         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1820         unit = match.group('unit')
1821         if unit == 'month' or unit == 'year':
1822             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1823             unit = 'day'
1824         else:
1825             if unit == 'week':
1826                 unit = 'day'
1827                 time *= 7
1828             delta = datetime.timedelta(**{unit + 's': time})
1829             new_date = start_time + delta
1830         if auto_precision:
1831             return datetime_round(new_date, unit)
1832         return new_date
1833
1834     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1835
1836
1837 def date_from_str(date_str, format='%Y%m%d', strict=False):
1838     R"""
1839     Return a date object from a string using datetime_from_str
1840
1841     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1842                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1843     """
1844     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1845         raise ValueError(f'Invalid date format "{date_str}"')
1846     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1847
1848
1849 def datetime_add_months(dt, months):
1850     """Increment/Decrement a datetime object by months."""
1851     month = dt.month + months - 1
1852     year = dt.year + month // 12
1853     month = month % 12 + 1
1854     day = min(dt.day, calendar.monthrange(year, month)[1])
1855     return dt.replace(year, month, day)
1856
1857
1858 def datetime_round(dt, precision='day'):
1859     """
1860     Round a datetime object's time to a specific precision
1861     """
1862     if precision == 'microsecond':
1863         return dt
1864
1865     unit_seconds = {
1866         'day': 86400,
1867         'hour': 3600,
1868         'minute': 60,
1869         'second': 1,
1870     }
1871     roundto = lambda x, n: ((x + n / 2) // n) * n
1872     timestamp = calendar.timegm(dt.timetuple())
1873     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1874
1875
1876 def hyphenate_date(date_str):
1877     """
1878     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1879     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1880     if match is not None:
1881         return '-'.join(match.groups())
1882     else:
1883         return date_str
1884
1885
1886 class DateRange:
1887     """Represents a time interval between two dates"""
1888
1889     def __init__(self, start=None, end=None):
1890         """start and end must be strings in the format accepted by date"""
1891         if start is not None:
1892             self.start = date_from_str(start, strict=True)
1893         else:
1894             self.start = datetime.datetime.min.date()
1895         if end is not None:
1896             self.end = date_from_str(end, strict=True)
1897         else:
1898             self.end = datetime.datetime.max.date()
1899         if self.start > self.end:
1900             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1901
1902     @classmethod
1903     def day(cls, day):
1904         """Returns a range that only contains the given day"""
1905         return cls(day, day)
1906
1907     def __contains__(self, date):
1908         """Check if the date is in the range"""
1909         if not isinstance(date, datetime.date):
1910             date = date_from_str(date)
1911         return self.start <= date <= self.end
1912
1913     def __str__(self):
1914         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1915
1916
1917 def platform_name():
1918     """ Returns the platform name as a compat_str """
1919     res = platform.platform()
1920     if isinstance(res, bytes):
1921         res = res.decode(preferredencoding())
1922
1923     assert isinstance(res, compat_str)
1924     return res
1925
1926
1927 @functools.cache
1928 def get_windows_version():
1929     ''' Get Windows version. returns () if it's not running on Windows '''
1930     if compat_os_name == 'nt':
1931         return version_tuple(platform.win32_ver()[1])
1932     else:
1933         return ()
1934
1935
1936 def write_string(s, out=None, encoding=None):
1937     assert isinstance(s, str)
1938     out = out or sys.stderr
1939
1940     if compat_os_name == 'nt' and supports_terminal_sequences(out):
1941         s = re.sub(r'([\r\n]+)', r' \1', s)
1942
1943     enc, buffer = None, out
1944     if 'b' in getattr(out, 'mode', ''):
1945         enc = encoding or preferredencoding()
1946     elif hasattr(out, 'buffer'):
1947         buffer = out.buffer
1948         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1949
1950     buffer.write(s.encode(enc, 'ignore') if enc else s)
1951     out.flush()
1952
1953
1954 def bytes_to_intlist(bs):
1955     if not bs:
1956         return []
1957     if isinstance(bs[0], int):  # Python 3
1958         return list(bs)
1959     else:
1960         return [ord(c) for c in bs]
1961
1962
1963 def intlist_to_bytes(xs):
1964     if not xs:
1965         return b''
1966     return compat_struct_pack('%dB' % len(xs), *xs)
1967
1968
1969 class LockingUnsupportedError(OSError):
1970     msg = 'File locking is not supported'
1971
1972     def __init__(self):
1973         super().__init__(self.msg)
1974
1975
1976 # Cross-platform file locking
1977 if sys.platform == 'win32':
1978     import ctypes.wintypes
1979     import msvcrt
1980
1981     class OVERLAPPED(ctypes.Structure):
1982         _fields_ = [
1983             ('Internal', ctypes.wintypes.LPVOID),
1984             ('InternalHigh', ctypes.wintypes.LPVOID),
1985             ('Offset', ctypes.wintypes.DWORD),
1986             ('OffsetHigh', ctypes.wintypes.DWORD),
1987             ('hEvent', ctypes.wintypes.HANDLE),
1988         ]
1989
1990     kernel32 = ctypes.windll.kernel32
1991     LockFileEx = kernel32.LockFileEx
1992     LockFileEx.argtypes = [
1993         ctypes.wintypes.HANDLE,     # hFile
1994         ctypes.wintypes.DWORD,      # dwFlags
1995         ctypes.wintypes.DWORD,      # dwReserved
1996         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1997         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1998         ctypes.POINTER(OVERLAPPED)  # Overlapped
1999     ]
2000     LockFileEx.restype = ctypes.wintypes.BOOL
2001     UnlockFileEx = kernel32.UnlockFileEx
2002     UnlockFileEx.argtypes = [
2003         ctypes.wintypes.HANDLE,     # hFile
2004         ctypes.wintypes.DWORD,      # dwReserved
2005         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2006         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2007         ctypes.POINTER(OVERLAPPED)  # Overlapped
2008     ]
2009     UnlockFileEx.restype = ctypes.wintypes.BOOL
2010     whole_low = 0xffffffff
2011     whole_high = 0x7fffffff
2012
2013     def _lock_file(f, exclusive, block):
2014         overlapped = OVERLAPPED()
2015         overlapped.Offset = 0
2016         overlapped.OffsetHigh = 0
2017         overlapped.hEvent = 0
2018         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2019
2020         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2021                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2022                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2023             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2024             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2025
2026     def _unlock_file(f):
2027         assert f._lock_file_overlapped_p
2028         handle = msvcrt.get_osfhandle(f.fileno())
2029         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2030             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2031
2032 else:
2033     try:
2034         import fcntl
2035
2036         def _lock_file(f, exclusive, block):
2037             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2038             if not block:
2039                 flags |= fcntl.LOCK_NB
2040             try:
2041                 fcntl.flock(f, flags)
2042             except BlockingIOError:
2043                 raise
2044             except OSError:  # AOSP does not have flock()
2045                 fcntl.lockf(f, flags)
2046
2047         def _unlock_file(f):
2048             try:
2049                 fcntl.flock(f, fcntl.LOCK_UN)
2050             except OSError:
2051                 fcntl.lockf(f, fcntl.LOCK_UN)
2052
2053     except ImportError:
2054
2055         def _lock_file(f, exclusive, block):
2056             raise LockingUnsupportedError()
2057
2058         def _unlock_file(f):
2059             raise LockingUnsupportedError()
2060
2061
2062 class locked_file:
2063     locked = False
2064
2065     def __init__(self, filename, mode, block=True, encoding=None):
2066         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2067             raise NotImplementedError(mode)
2068         self.mode, self.block = mode, block
2069
2070         writable = any(f in mode for f in 'wax+')
2071         readable = any(f in mode for f in 'r+')
2072         flags = functools.reduce(operator.ior, (
2073             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2074             getattr(os, 'O_BINARY', 0),  # Windows only
2075             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2076             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2077             os.O_APPEND if 'a' in mode else 0,
2078             os.O_EXCL if 'x' in mode else 0,
2079             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2080         ))
2081
2082         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2083
2084     def __enter__(self):
2085         exclusive = 'r' not in self.mode
2086         try:
2087             _lock_file(self.f, exclusive, self.block)
2088             self.locked = True
2089         except OSError:
2090             self.f.close()
2091             raise
2092         if 'w' in self.mode:
2093             try:
2094                 self.f.truncate()
2095             except OSError as e:
2096                 if e.errno not in (
2097                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2098                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2099                 ):
2100                     raise
2101         return self
2102
2103     def unlock(self):
2104         if not self.locked:
2105             return
2106         try:
2107             _unlock_file(self.f)
2108         finally:
2109             self.locked = False
2110
2111     def __exit__(self, *_):
2112         try:
2113             self.unlock()
2114         finally:
2115             self.f.close()
2116
2117     open = __enter__
2118     close = __exit__
2119
2120     def __getattr__(self, attr):
2121         return getattr(self.f, attr)
2122
2123     def __iter__(self):
2124         return iter(self.f)
2125
2126
2127 @functools.cache
2128 def get_filesystem_encoding():
2129     encoding = sys.getfilesystemencoding()
2130     return encoding if encoding is not None else 'utf-8'
2131
2132
2133 def shell_quote(args):
2134     quoted_args = []
2135     encoding = get_filesystem_encoding()
2136     for a in args:
2137         if isinstance(a, bytes):
2138             # We may get a filename encoded with 'encodeFilename'
2139             a = a.decode(encoding)
2140         quoted_args.append(compat_shlex_quote(a))
2141     return ' '.join(quoted_args)
2142
2143
2144 def smuggle_url(url, data):
2145     """ Pass additional data in a URL for internal use. """
2146
2147     url, idata = unsmuggle_url(url, {})
2148     data.update(idata)
2149     sdata = compat_urllib_parse_urlencode(
2150         {'__youtubedl_smuggle': json.dumps(data)})
2151     return url + '#' + sdata
2152
2153
2154 def unsmuggle_url(smug_url, default=None):
2155     if '#__youtubedl_smuggle' not in smug_url:
2156         return smug_url, default
2157     url, _, sdata = smug_url.rpartition('#')
2158     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
2159     data = json.loads(jsond)
2160     return url, data
2161
2162
2163 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2164     """ Formats numbers with decimal sufixes like K, M, etc """
2165     num, factor = float_or_none(num), float(factor)
2166     if num is None or num < 0:
2167         return None
2168     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2169     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2170     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2171     if factor == 1024:
2172         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2173     converted = num / (factor ** exponent)
2174     return fmt % (converted, suffix)
2175
2176
2177 def format_bytes(bytes):
2178     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2179
2180
2181 def lookup_unit_table(unit_table, s):
2182     units_re = '|'.join(re.escape(u) for u in unit_table)
2183     m = re.match(
2184         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2185     if not m:
2186         return None
2187     num_str = m.group('num').replace(',', '.')
2188     mult = unit_table[m.group('unit')]
2189     return int(float(num_str) * mult)
2190
2191
2192 def parse_filesize(s):
2193     if s is None:
2194         return None
2195
2196     # The lower-case forms are of course incorrect and unofficial,
2197     # but we support those too
2198     _UNIT_TABLE = {
2199         'B': 1,
2200         'b': 1,
2201         'bytes': 1,
2202         'KiB': 1024,
2203         'KB': 1000,
2204         'kB': 1024,
2205         'Kb': 1000,
2206         'kb': 1000,
2207         'kilobytes': 1000,
2208         'kibibytes': 1024,
2209         'MiB': 1024 ** 2,
2210         'MB': 1000 ** 2,
2211         'mB': 1024 ** 2,
2212         'Mb': 1000 ** 2,
2213         'mb': 1000 ** 2,
2214         'megabytes': 1000 ** 2,
2215         'mebibytes': 1024 ** 2,
2216         'GiB': 1024 ** 3,
2217         'GB': 1000 ** 3,
2218         'gB': 1024 ** 3,
2219         'Gb': 1000 ** 3,
2220         'gb': 1000 ** 3,
2221         'gigabytes': 1000 ** 3,
2222         'gibibytes': 1024 ** 3,
2223         'TiB': 1024 ** 4,
2224         'TB': 1000 ** 4,
2225         'tB': 1024 ** 4,
2226         'Tb': 1000 ** 4,
2227         'tb': 1000 ** 4,
2228         'terabytes': 1000 ** 4,
2229         'tebibytes': 1024 ** 4,
2230         'PiB': 1024 ** 5,
2231         'PB': 1000 ** 5,
2232         'pB': 1024 ** 5,
2233         'Pb': 1000 ** 5,
2234         'pb': 1000 ** 5,
2235         'petabytes': 1000 ** 5,
2236         'pebibytes': 1024 ** 5,
2237         'EiB': 1024 ** 6,
2238         'EB': 1000 ** 6,
2239         'eB': 1024 ** 6,
2240         'Eb': 1000 ** 6,
2241         'eb': 1000 ** 6,
2242         'exabytes': 1000 ** 6,
2243         'exbibytes': 1024 ** 6,
2244         'ZiB': 1024 ** 7,
2245         'ZB': 1000 ** 7,
2246         'zB': 1024 ** 7,
2247         'Zb': 1000 ** 7,
2248         'zb': 1000 ** 7,
2249         'zettabytes': 1000 ** 7,
2250         'zebibytes': 1024 ** 7,
2251         'YiB': 1024 ** 8,
2252         'YB': 1000 ** 8,
2253         'yB': 1024 ** 8,
2254         'Yb': 1000 ** 8,
2255         'yb': 1000 ** 8,
2256         'yottabytes': 1000 ** 8,
2257         'yobibytes': 1024 ** 8,
2258     }
2259
2260     return lookup_unit_table(_UNIT_TABLE, s)
2261
2262
2263 def parse_count(s):
2264     if s is None:
2265         return None
2266
2267     s = re.sub(r'^[^\d]+\s', '', s).strip()
2268
2269     if re.match(r'^[\d,.]+$', s):
2270         return str_to_int(s)
2271
2272     _UNIT_TABLE = {
2273         'k': 1000,
2274         'K': 1000,
2275         'm': 1000 ** 2,
2276         'M': 1000 ** 2,
2277         'kk': 1000 ** 2,
2278         'KK': 1000 ** 2,
2279         'b': 1000 ** 3,
2280         'B': 1000 ** 3,
2281     }
2282
2283     ret = lookup_unit_table(_UNIT_TABLE, s)
2284     if ret is not None:
2285         return ret
2286
2287     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2288     if mobj:
2289         return str_to_int(mobj.group(1))
2290
2291
2292 def parse_resolution(s, *, lenient=False):
2293     if s is None:
2294         return {}
2295
2296     if lenient:
2297         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2298     else:
2299         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2300     if mobj:
2301         return {
2302             'width': int(mobj.group('w')),
2303             'height': int(mobj.group('h')),
2304         }
2305
2306     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2307     if mobj:
2308         return {'height': int(mobj.group(1))}
2309
2310     mobj = re.search(r'\b([48])[kK]\b', s)
2311     if mobj:
2312         return {'height': int(mobj.group(1)) * 540}
2313
2314     return {}
2315
2316
2317 def parse_bitrate(s):
2318     if not isinstance(s, compat_str):
2319         return
2320     mobj = re.search(r'\b(\d+)\s*kbps', s)
2321     if mobj:
2322         return int(mobj.group(1))
2323
2324
2325 def month_by_name(name, lang='en'):
2326     """ Return the number of a month by (locale-independently) English name """
2327
2328     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2329
2330     try:
2331         return month_names.index(name) + 1
2332     except ValueError:
2333         return None
2334
2335
2336 def month_by_abbreviation(abbrev):
2337     """ Return the number of a month by (locale-independently) English
2338         abbreviations """
2339
2340     try:
2341         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2342     except ValueError:
2343         return None
2344
2345
2346 def fix_xml_ampersands(xml_str):
2347     """Replace all the '&' by '&amp;' in XML"""
2348     return re.sub(
2349         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2350         '&amp;',
2351         xml_str)
2352
2353
2354 def setproctitle(title):
2355     assert isinstance(title, compat_str)
2356
2357     # ctypes in Jython is not complete
2358     # http://bugs.jython.org/issue2148
2359     if sys.platform.startswith('java'):
2360         return
2361
2362     try:
2363         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2364     except OSError:
2365         return
2366     except TypeError:
2367         # LoadLibrary in Windows Python 2.7.13 only expects
2368         # a bytestring, but since unicode_literals turns
2369         # every string into a unicode string, it fails.
2370         return
2371     title_bytes = title.encode()
2372     buf = ctypes.create_string_buffer(len(title_bytes))
2373     buf.value = title_bytes
2374     try:
2375         libc.prctl(15, buf, 0, 0, 0)
2376     except AttributeError:
2377         return  # Strange libc, just skip this
2378
2379
2380 def remove_start(s, start):
2381     return s[len(start):] if s is not None and s.startswith(start) else s
2382
2383
2384 def remove_end(s, end):
2385     return s[:-len(end)] if s is not None and s.endswith(end) else s
2386
2387
2388 def remove_quotes(s):
2389     if s is None or len(s) < 2:
2390         return s
2391     for quote in ('"', "'", ):
2392         if s[0] == quote and s[-1] == quote:
2393             return s[1:-1]
2394     return s
2395
2396
2397 def get_domain(url):
2398     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
2399     return domain.group('domain') if domain else None
2400
2401
2402 def url_basename(url):
2403     path = compat_urlparse.urlparse(url).path
2404     return path.strip('/').split('/')[-1]
2405
2406
2407 def base_url(url):
2408     return re.match(r'https?://[^?#&]+/', url).group()
2409
2410
2411 def urljoin(base, path):
2412     if isinstance(path, bytes):
2413         path = path.decode()
2414     if not isinstance(path, compat_str) or not path:
2415         return None
2416     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2417         return path
2418     if isinstance(base, bytes):
2419         base = base.decode()
2420     if not isinstance(base, compat_str) or not re.match(
2421             r'^(?:https?:)?//', base):
2422         return None
2423     return compat_urlparse.urljoin(base, path)
2424
2425
2426 class HEADRequest(compat_urllib_request.Request):
2427     def get_method(self):
2428         return 'HEAD'
2429
2430
2431 class PUTRequest(compat_urllib_request.Request):
2432     def get_method(self):
2433         return 'PUT'
2434
2435
2436 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2437     if get_attr and v is not None:
2438         v = getattr(v, get_attr, None)
2439     try:
2440         return int(v) * invscale // scale
2441     except (ValueError, TypeError, OverflowError):
2442         return default
2443
2444
2445 def str_or_none(v, default=None):
2446     return default if v is None else compat_str(v)
2447
2448
2449 def str_to_int(int_str):
2450     """ A more relaxed version of int_or_none """
2451     if isinstance(int_str, int):
2452         return int_str
2453     elif isinstance(int_str, compat_str):
2454         int_str = re.sub(r'[,\.\+]', '', int_str)
2455         return int_or_none(int_str)
2456
2457
2458 def float_or_none(v, scale=1, invscale=1, default=None):
2459     if v is None:
2460         return default
2461     try:
2462         return float(v) * invscale / scale
2463     except (ValueError, TypeError):
2464         return default
2465
2466
2467 def bool_or_none(v, default=None):
2468     return v if isinstance(v, bool) else default
2469
2470
2471 def strip_or_none(v, default=None):
2472     return v.strip() if isinstance(v, compat_str) else default
2473
2474
2475 def url_or_none(url):
2476     if not url or not isinstance(url, compat_str):
2477         return None
2478     url = url.strip()
2479     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2480
2481
2482 def request_to_url(req):
2483     if isinstance(req, compat_urllib_request.Request):
2484         return req.get_full_url()
2485     else:
2486         return req
2487
2488
2489 def strftime_or_none(timestamp, date_format, default=None):
2490     datetime_object = None
2491     try:
2492         if isinstance(timestamp, (int, float)):  # unix timestamp
2493             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2494         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
2495             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2496         return datetime_object.strftime(date_format)
2497     except (ValueError, TypeError, AttributeError):
2498         return default
2499
2500
2501 def parse_duration(s):
2502     if not isinstance(s, str):
2503         return None
2504     s = s.strip()
2505     if not s:
2506         return None
2507
2508     days, hours, mins, secs, ms = [None] * 5
2509     m = re.match(r'''(?x)
2510             (?P<before_secs>
2511                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2512             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2513             (?P<ms>[.:][0-9]+)?Z?$
2514         ''', s)
2515     if m:
2516         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2517     else:
2518         m = re.match(
2519             r'''(?ix)(?:P?
2520                 (?:
2521                     [0-9]+\s*y(?:ears?)?,?\s*
2522                 )?
2523                 (?:
2524                     [0-9]+\s*m(?:onths?)?,?\s*
2525                 )?
2526                 (?:
2527                     [0-9]+\s*w(?:eeks?)?,?\s*
2528                 )?
2529                 (?:
2530                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2531                 )?
2532                 T)?
2533                 (?:
2534                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2535                 )?
2536                 (?:
2537                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2538                 )?
2539                 (?:
2540                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2541                 )?Z?$''', s)
2542         if m:
2543             days, hours, mins, secs, ms = m.groups()
2544         else:
2545             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2546             if m:
2547                 hours, mins = m.groups()
2548             else:
2549                 return None
2550
2551     if ms:
2552         ms = ms.replace(':', '.')
2553     return sum(float(part or 0) * mult for part, mult in (
2554         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2555
2556
2557 def prepend_extension(filename, ext, expected_real_ext=None):
2558     name, real_ext = os.path.splitext(filename)
2559     return (
2560         f'{name}.{ext}{real_ext}'
2561         if not expected_real_ext or real_ext[1:] == expected_real_ext
2562         else f'{filename}.{ext}')
2563
2564
2565 def replace_extension(filename, ext, expected_real_ext=None):
2566     name, real_ext = os.path.splitext(filename)
2567     return '{}.{}'.format(
2568         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2569         ext)
2570
2571
2572 def check_executable(exe, args=[]):
2573     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2574     args can be a list of arguments for a short output (like -version) """
2575     try:
2576         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2577     except OSError:
2578         return False
2579     return exe
2580
2581
2582 def _get_exe_version_output(exe, args, *, to_screen=None):
2583     if to_screen:
2584         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2585     try:
2586         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2587         # SIGTTOU if yt-dlp is run in the background.
2588         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2589         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2590                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2591     except OSError:
2592         return False
2593     return stdout
2594
2595
2596 def detect_exe_version(output, version_re=None, unrecognized='present'):
2597     assert isinstance(output, compat_str)
2598     if version_re is None:
2599         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2600     m = re.search(version_re, output)
2601     if m:
2602         return m.group(1)
2603     else:
2604         return unrecognized
2605
2606
2607 def get_exe_version(exe, args=['--version'],
2608                     version_re=None, unrecognized='present'):
2609     """ Returns the version of the specified executable,
2610     or False if the executable is not present """
2611     out = _get_exe_version_output(exe, args)
2612     return detect_exe_version(out, version_re, unrecognized) if out else False
2613
2614
2615 def frange(start=0, stop=None, step=1):
2616     """Float range"""
2617     if stop is None:
2618         start, stop = 0, start
2619     sign = [-1, 1][step > 0] if step else 0
2620     while sign * start < sign * stop:
2621         yield start
2622         start += step
2623
2624
2625 class LazyList(collections.abc.Sequence):
2626     """Lazy immutable list from an iterable
2627     Note that slices of a LazyList are lists and not LazyList"""
2628
2629     class IndexError(IndexError):
2630         pass
2631
2632     def __init__(self, iterable, *, reverse=False, _cache=None):
2633         self._iterable = iter(iterable)
2634         self._cache = [] if _cache is None else _cache
2635         self._reversed = reverse
2636
2637     def __iter__(self):
2638         if self._reversed:
2639             # We need to consume the entire iterable to iterate in reverse
2640             yield from self.exhaust()
2641             return
2642         yield from self._cache
2643         for item in self._iterable:
2644             self._cache.append(item)
2645             yield item
2646
2647     def _exhaust(self):
2648         self._cache.extend(self._iterable)
2649         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2650         return self._cache
2651
2652     def exhaust(self):
2653         """Evaluate the entire iterable"""
2654         return self._exhaust()[::-1 if self._reversed else 1]
2655
2656     @staticmethod
2657     def _reverse_index(x):
2658         return None if x is None else -(x + 1)
2659
2660     def __getitem__(self, idx):
2661         if isinstance(idx, slice):
2662             if self._reversed:
2663                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2664             start, stop, step = idx.start, idx.stop, idx.step or 1
2665         elif isinstance(idx, int):
2666             if self._reversed:
2667                 idx = self._reverse_index(idx)
2668             start, stop, step = idx, idx, 0
2669         else:
2670             raise TypeError('indices must be integers or slices')
2671         if ((start or 0) < 0 or (stop or 0) < 0
2672                 or (start is None and step < 0)
2673                 or (stop is None and step > 0)):
2674             # We need to consume the entire iterable to be able to slice from the end
2675             # Obviously, never use this with infinite iterables
2676             self._exhaust()
2677             try:
2678                 return self._cache[idx]
2679             except IndexError as e:
2680                 raise self.IndexError(e) from e
2681         n = max(start or 0, stop or 0) - len(self._cache) + 1
2682         if n > 0:
2683             self._cache.extend(itertools.islice(self._iterable, n))
2684         try:
2685             return self._cache[idx]
2686         except IndexError as e:
2687             raise self.IndexError(e) from e
2688
2689     def __bool__(self):
2690         try:
2691             self[-1] if self._reversed else self[0]
2692         except self.IndexError:
2693             return False
2694         return True
2695
2696     def __len__(self):
2697         self._exhaust()
2698         return len(self._cache)
2699
2700     def __reversed__(self):
2701         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2702
2703     def __copy__(self):
2704         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2705
2706     def __repr__(self):
2707         # repr and str should mimic a list. So we exhaust the iterable
2708         return repr(self.exhaust())
2709
2710     def __str__(self):
2711         return repr(self.exhaust())
2712
2713
2714 class PagedList:
2715
2716     class IndexError(IndexError):
2717         pass
2718
2719     def __len__(self):
2720         # This is only useful for tests
2721         return len(self.getslice())
2722
2723     def __init__(self, pagefunc, pagesize, use_cache=True):
2724         self._pagefunc = pagefunc
2725         self._pagesize = pagesize
2726         self._pagecount = float('inf')
2727         self._use_cache = use_cache
2728         self._cache = {}
2729
2730     def getpage(self, pagenum):
2731         page_results = self._cache.get(pagenum)
2732         if page_results is None:
2733             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2734         if self._use_cache:
2735             self._cache[pagenum] = page_results
2736         return page_results
2737
2738     def getslice(self, start=0, end=None):
2739         return list(self._getslice(start, end))
2740
2741     def _getslice(self, start, end):
2742         raise NotImplementedError('This method must be implemented by subclasses')
2743
2744     def __getitem__(self, idx):
2745         assert self._use_cache, 'Indexing PagedList requires cache'
2746         if not isinstance(idx, int) or idx < 0:
2747             raise TypeError('indices must be non-negative integers')
2748         entries = self.getslice(idx, idx + 1)
2749         if not entries:
2750             raise self.IndexError()
2751         return entries[0]
2752
2753
2754 class OnDemandPagedList(PagedList):
2755     """Download pages until a page with less than maximum results"""
2756
2757     def _getslice(self, start, end):
2758         for pagenum in itertools.count(start // self._pagesize):
2759             firstid = pagenum * self._pagesize
2760             nextfirstid = pagenum * self._pagesize + self._pagesize
2761             if start >= nextfirstid:
2762                 continue
2763
2764             startv = (
2765                 start % self._pagesize
2766                 if firstid <= start < nextfirstid
2767                 else 0)
2768             endv = (
2769                 ((end - 1) % self._pagesize) + 1
2770                 if (end is not None and firstid <= end <= nextfirstid)
2771                 else None)
2772
2773             try:
2774                 page_results = self.getpage(pagenum)
2775             except Exception:
2776                 self._pagecount = pagenum - 1
2777                 raise
2778             if startv != 0 or endv is not None:
2779                 page_results = page_results[startv:endv]
2780             yield from page_results
2781
2782             # A little optimization - if current page is not "full", ie. does
2783             # not contain page_size videos then we can assume that this page
2784             # is the last one - there are no more ids on further pages -
2785             # i.e. no need to query again.
2786             if len(page_results) + startv < self._pagesize:
2787                 break
2788
2789             # If we got the whole page, but the next page is not interesting,
2790             # break out early as well
2791             if end == nextfirstid:
2792                 break
2793
2794
2795 class InAdvancePagedList(PagedList):
2796     """PagedList with total number of pages known in advance"""
2797
2798     def __init__(self, pagefunc, pagecount, pagesize):
2799         PagedList.__init__(self, pagefunc, pagesize, True)
2800         self._pagecount = pagecount
2801
2802     def _getslice(self, start, end):
2803         start_page = start // self._pagesize
2804         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2805         skip_elems = start - start_page * self._pagesize
2806         only_more = None if end is None else end - start
2807         for pagenum in range(start_page, end_page):
2808             page_results = self.getpage(pagenum)
2809             if skip_elems:
2810                 page_results = page_results[skip_elems:]
2811                 skip_elems = None
2812             if only_more is not None:
2813                 if len(page_results) < only_more:
2814                     only_more -= len(page_results)
2815                 else:
2816                     yield from page_results[:only_more]
2817                     break
2818             yield from page_results
2819
2820
2821 class PlaylistEntries:
2822     MissingEntry = object()
2823     is_exhausted = False
2824
2825     def __init__(self, ydl, info_dict):
2826         self.ydl = ydl
2827
2828         # _entries must be assigned now since infodict can change during iteration
2829         entries = info_dict.get('entries')
2830         if entries is None:
2831             raise EntryNotInPlaylist('There are no entries')
2832         elif isinstance(entries, list):
2833             self.is_exhausted = True
2834
2835         requested_entries = info_dict.get('requested_entries')
2836         self.is_incomplete = bool(requested_entries)
2837         if self.is_incomplete:
2838             assert self.is_exhausted
2839             self._entries = [self.MissingEntry] * max(requested_entries)
2840             for i, entry in zip(requested_entries, entries):
2841                 self._entries[i - 1] = entry
2842         elif isinstance(entries, (list, PagedList, LazyList)):
2843             self._entries = entries
2844         else:
2845             self._entries = LazyList(entries)
2846
2847     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2848         (?P<start>[+-]?\d+)?
2849         (?P<range>[:-]
2850             (?P<end>[+-]?\d+|inf(?:inite)?)?
2851             (?::(?P<step>[+-]?\d+))?
2852         )?''')
2853
2854     @classmethod
2855     def parse_playlist_items(cls, string):
2856         for segment in string.split(','):
2857             if not segment:
2858                 raise ValueError('There is two or more consecutive commas')
2859             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2860             if not mobj:
2861                 raise ValueError(f'{segment!r} is not a valid specification')
2862             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2863             if int_or_none(step) == 0:
2864                 raise ValueError(f'Step in {segment!r} cannot be zero')
2865             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2866
2867     def get_requested_items(self):
2868         playlist_items = self.ydl.params.get('playlist_items')
2869         playlist_start = self.ydl.params.get('playliststart', 1)
2870         playlist_end = self.ydl.params.get('playlistend')
2871         # For backwards compatibility, interpret -1 as whole list
2872         if playlist_end in (-1, None):
2873             playlist_end = ''
2874         if not playlist_items:
2875             playlist_items = f'{playlist_start}:{playlist_end}'
2876         elif playlist_start != 1 or playlist_end:
2877             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
2878
2879         for index in self.parse_playlist_items(playlist_items):
2880             for i, entry in self[index]:
2881                 yield i, entry
2882                 try:
2883                     # TODO: Add auto-generated fields
2884                     self.ydl._match_entry(entry, incomplete=True, silent=True)
2885                 except (ExistingVideoReached, RejectedVideoReached):
2886                     return
2887
2888     def get_full_count(self):
2889         if self.is_exhausted and not self.is_incomplete:
2890             return len(self)
2891         elif isinstance(self._entries, InAdvancePagedList):
2892             if self._entries._pagesize == 1:
2893                 return self._entries._pagecount
2894
2895     @functools.cached_property
2896     def _getter(self):
2897         if isinstance(self._entries, list):
2898             def get_entry(i):
2899                 try:
2900                     entry = self._entries[i]
2901                 except IndexError:
2902                     entry = self.MissingEntry
2903                     if not self.is_incomplete:
2904                         raise self.IndexError()
2905                 if entry is self.MissingEntry:
2906                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
2907                 return entry
2908         else:
2909             def get_entry(i):
2910                 try:
2911                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
2912                 except (LazyList.IndexError, PagedList.IndexError):
2913                     raise self.IndexError()
2914         return get_entry
2915
2916     def __getitem__(self, idx):
2917         if isinstance(idx, int):
2918             idx = slice(idx, idx)
2919
2920         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
2921         step = 1 if idx.step is None else idx.step
2922         if idx.start is None:
2923             start = 0 if step > 0 else len(self) - 1
2924         else:
2925             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
2926
2927         # NB: Do not call len(self) when idx == [:]
2928         if idx.stop is None:
2929             stop = 0 if step < 0 else float('inf')
2930         else:
2931             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
2932         stop += [-1, 1][step > 0]
2933
2934         for i in frange(start, stop, step):
2935             if i < 0:
2936                 continue
2937             try:
2938                 entry = self._getter(i)
2939             except self.IndexError:
2940                 self.is_exhausted = True
2941                 if step > 0:
2942                     break
2943                 continue
2944             yield i + 1, entry
2945
2946     def __len__(self):
2947         return len(tuple(self[:]))
2948
2949     class IndexError(IndexError):
2950         pass
2951
2952
2953 def uppercase_escape(s):
2954     unicode_escape = codecs.getdecoder('unicode_escape')
2955     return re.sub(
2956         r'\\U[0-9a-fA-F]{8}',
2957         lambda m: unicode_escape(m.group(0))[0],
2958         s)
2959
2960
2961 def lowercase_escape(s):
2962     unicode_escape = codecs.getdecoder('unicode_escape')
2963     return re.sub(
2964         r'\\u[0-9a-fA-F]{4}',
2965         lambda m: unicode_escape(m.group(0))[0],
2966         s)
2967
2968
2969 def escape_rfc3986(s):
2970     """Escape non-ASCII characters as suggested by RFC 3986"""
2971     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2972
2973
2974 def escape_url(url):
2975     """Escape URL as suggested by RFC 3986"""
2976     url_parsed = compat_urllib_parse_urlparse(url)
2977     return url_parsed._replace(
2978         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2979         path=escape_rfc3986(url_parsed.path),
2980         params=escape_rfc3986(url_parsed.params),
2981         query=escape_rfc3986(url_parsed.query),
2982         fragment=escape_rfc3986(url_parsed.fragment)
2983     ).geturl()
2984
2985
2986 def parse_qs(url):
2987     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2988
2989
2990 def read_batch_urls(batch_fd):
2991     def fixup(url):
2992         if not isinstance(url, compat_str):
2993             url = url.decode('utf-8', 'replace')
2994         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
2995         for bom in BOM_UTF8:
2996             if url.startswith(bom):
2997                 url = url[len(bom):]
2998         url = url.lstrip()
2999         if not url or url.startswith(('#', ';', ']')):
3000             return False
3001         # "#" cannot be stripped out since it is part of the URI
3002         # However, it can be safely stipped out if follwing a whitespace
3003         return re.split(r'\s#', url, 1)[0].rstrip()
3004
3005     with contextlib.closing(batch_fd) as fd:
3006         return [url for url in map(fixup, fd) if url]
3007
3008
3009 def urlencode_postdata(*args, **kargs):
3010     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3011
3012
3013 def update_url_query(url, query):
3014     if not query:
3015         return url
3016     parsed_url = compat_urlparse.urlparse(url)
3017     qs = compat_parse_qs(parsed_url.query)
3018     qs.update(query)
3019     return compat_urlparse.urlunparse(parsed_url._replace(
3020         query=compat_urllib_parse_urlencode(qs, True)))
3021
3022
3023 def update_Request(req, url=None, data=None, headers={}, query={}):
3024     req_headers = req.headers.copy()
3025     req_headers.update(headers)
3026     req_data = data or req.data
3027     req_url = update_url_query(url or req.get_full_url(), query)
3028     req_get_method = req.get_method()
3029     if req_get_method == 'HEAD':
3030         req_type = HEADRequest
3031     elif req_get_method == 'PUT':
3032         req_type = PUTRequest
3033     else:
3034         req_type = compat_urllib_request.Request
3035     new_req = req_type(
3036         req_url, data=req_data, headers=req_headers,
3037         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3038     if hasattr(req, 'timeout'):
3039         new_req.timeout = req.timeout
3040     return new_req
3041
3042
3043 def _multipart_encode_impl(data, boundary):
3044     content_type = 'multipart/form-data; boundary=%s' % boundary
3045
3046     out = b''
3047     for k, v in data.items():
3048         out += b'--' + boundary.encode('ascii') + b'\r\n'
3049         if isinstance(k, compat_str):
3050             k = k.encode()
3051         if isinstance(v, compat_str):
3052             v = v.encode()
3053         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3054         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3055         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3056         if boundary.encode('ascii') in content:
3057             raise ValueError('Boundary overlaps with data')
3058         out += content
3059
3060     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3061
3062     return out, content_type
3063
3064
3065 def multipart_encode(data, boundary=None):
3066     '''
3067     Encode a dict to RFC 7578-compliant form-data
3068
3069     data:
3070         A dict where keys and values can be either Unicode or bytes-like
3071         objects.
3072     boundary:
3073         If specified a Unicode object, it's used as the boundary. Otherwise
3074         a random boundary is generated.
3075
3076     Reference: https://tools.ietf.org/html/rfc7578
3077     '''
3078     has_specified_boundary = boundary is not None
3079
3080     while True:
3081         if boundary is None:
3082             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3083
3084         try:
3085             out, content_type = _multipart_encode_impl(data, boundary)
3086             break
3087         except ValueError:
3088             if has_specified_boundary:
3089                 raise
3090             boundary = None
3091
3092     return out, content_type
3093
3094
3095 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3096     for val in map(d.get, variadic(key_or_keys)):
3097         if val is not None and (val or not skip_false_values):
3098             return val
3099     return default
3100
3101
3102 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3103     for f in funcs:
3104         try:
3105             val = f(*args, **kwargs)
3106         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
3107             pass
3108         else:
3109             if expected_type is None or isinstance(val, expected_type):
3110                 return val
3111
3112
3113 def try_get(src, getter, expected_type=None):
3114     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3115
3116
3117 def filter_dict(dct, cndn=lambda _, v: v is not None):
3118     return {k: v for k, v in dct.items() if cndn(k, v)}
3119
3120
3121 def merge_dicts(*dicts):
3122     merged = {}
3123     for a_dict in dicts:
3124         for k, v in a_dict.items():
3125             if (v is not None and k not in merged
3126                     or isinstance(v, str) and merged[k] == ''):
3127                 merged[k] = v
3128     return merged
3129
3130
3131 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3132     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
3133
3134
3135 US_RATINGS = {
3136     'G': 0,
3137     'PG': 10,
3138     'PG-13': 13,
3139     'R': 16,
3140     'NC': 18,
3141 }
3142
3143
3144 TV_PARENTAL_GUIDELINES = {
3145     'TV-Y': 0,
3146     'TV-Y7': 7,
3147     'TV-G': 0,
3148     'TV-PG': 0,
3149     'TV-14': 14,
3150     'TV-MA': 17,
3151 }
3152
3153
3154 def parse_age_limit(s):
3155     # isinstance(False, int) is True. So type() must be used instead
3156     if type(s) is int:  # noqa: E721
3157         return s if 0 <= s <= 21 else None
3158     elif not isinstance(s, str):
3159         return None
3160     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3161     if m:
3162         return int(m.group('age'))
3163     s = s.upper()
3164     if s in US_RATINGS:
3165         return US_RATINGS[s]
3166     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3167     if m:
3168         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3169     return None
3170
3171
3172 def strip_jsonp(code):
3173     return re.sub(
3174         r'''(?sx)^
3175             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3176             (?:\s*&&\s*(?P=func_name))?
3177             \s*\(\s*(?P<callback_data>.*)\);?
3178             \s*?(?://[^\n]*)*$''',
3179         r'\g<callback_data>', code)
3180
3181
3182 def js_to_json(code, vars={}):
3183     # vars is a dict of var, val pairs to substitute
3184     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3185     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3186     INTEGER_TABLE = (
3187         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3188         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3189     )
3190
3191     def fix_kv(m):
3192         v = m.group(0)
3193         if v in ('true', 'false', 'null'):
3194             return v
3195         elif v in ('undefined', 'void 0'):
3196             return 'null'
3197         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3198             return ""
3199
3200         if v[0] in ("'", '"'):
3201             v = re.sub(r'(?s)\\.|"', lambda m: {
3202                 '"': '\\"',
3203                 "\\'": "'",
3204                 '\\\n': '',
3205                 '\\x': '\\u00',
3206             }.get(m.group(0), m.group(0)), v[1:-1])
3207         else:
3208             for regex, base in INTEGER_TABLE:
3209                 im = re.match(regex, v)
3210                 if im:
3211                     i = int(im.group(1), base)
3212                     return '"%d":' % i if v.endswith(':') else '%d' % i
3213
3214             if v in vars:
3215                 return vars[v]
3216
3217         return '"%s"' % v
3218
3219     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3220
3221     return re.sub(r'''(?sx)
3222         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
3223         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
3224         {comment}|,(?={skip}[\]}}])|
3225         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3226         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3227         [0-9]+(?={skip}:)|
3228         !+
3229         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3230
3231
3232 def qualities(quality_ids):
3233     """ Get a numeric quality value out of a list of possible values """
3234     def q(qid):
3235         try:
3236             return quality_ids.index(qid)
3237         except ValueError:
3238             return -1
3239     return q
3240
3241
3242 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist')
3243
3244
3245 DEFAULT_OUTTMPL = {
3246     'default': '%(title)s [%(id)s].%(ext)s',
3247     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3248 }
3249 OUTTMPL_TYPES = {
3250     'chapter': None,
3251     'subtitle': None,
3252     'thumbnail': None,
3253     'description': 'description',
3254     'annotation': 'annotations.xml',
3255     'infojson': 'info.json',
3256     'link': None,
3257     'pl_video': None,
3258     'pl_thumbnail': None,
3259     'pl_description': 'description',
3260     'pl_infojson': 'info.json',
3261 }
3262
3263 # As of [1] format syntax is:
3264 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3265 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3266 STR_FORMAT_RE_TMPL = r'''(?x)
3267     (?<!%)(?P<prefix>(?:%%)*)
3268     %
3269     (?P<has_key>\((?P<key>{0})\))?
3270     (?P<format>
3271         (?P<conversion>[#0\-+ ]+)?
3272         (?P<min_width>\d+)?
3273         (?P<precision>\.\d+)?
3274         (?P<len_mod>[hlL])?  # unused in python
3275         {1}  # conversion type
3276     )
3277 '''
3278
3279
3280 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3281
3282
3283 def limit_length(s, length):
3284     """ Add ellipses to overly long strings """
3285     if s is None:
3286         return None
3287     ELLIPSES = '...'
3288     if len(s) > length:
3289         return s[:length - len(ELLIPSES)] + ELLIPSES
3290     return s
3291
3292
3293 def version_tuple(v):
3294     return tuple(int(e) for e in re.split(r'[-.]', v))
3295
3296
3297 def is_outdated_version(version, limit, assume_new=True):
3298     if not version:
3299         return not assume_new
3300     try:
3301         return version_tuple(version) < version_tuple(limit)
3302     except ValueError:
3303         return not assume_new
3304
3305
3306 def ytdl_is_updateable():
3307     """ Returns if yt-dlp can be updated with -U """
3308
3309     from .update import is_non_updateable
3310
3311     return not is_non_updateable()
3312
3313
3314 def args_to_str(args):
3315     # Get a short string representation for a subprocess command
3316     return ' '.join(compat_shlex_quote(a) for a in args)
3317
3318
3319 def error_to_compat_str(err):
3320     return str(err)
3321
3322
3323 def error_to_str(err):
3324     return f'{type(err).__name__}: {err}'
3325
3326
3327 def mimetype2ext(mt):
3328     if mt is None:
3329         return None
3330
3331     mt, _, params = mt.partition(';')
3332     mt = mt.strip()
3333
3334     FULL_MAP = {
3335         'audio/mp4': 'm4a',
3336         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3337         # it's the most popular one
3338         'audio/mpeg': 'mp3',
3339         'audio/x-wav': 'wav',
3340         'audio/wav': 'wav',
3341         'audio/wave': 'wav',
3342     }
3343
3344     ext = FULL_MAP.get(mt)
3345     if ext is not None:
3346         return ext
3347
3348     SUBTYPE_MAP = {
3349         '3gpp': '3gp',
3350         'smptett+xml': 'tt',
3351         'ttaf+xml': 'dfxp',
3352         'ttml+xml': 'ttml',
3353         'x-flv': 'flv',
3354         'x-mp4-fragmented': 'mp4',
3355         'x-ms-sami': 'sami',
3356         'x-ms-wmv': 'wmv',
3357         'mpegurl': 'm3u8',
3358         'x-mpegurl': 'm3u8',
3359         'vnd.apple.mpegurl': 'm3u8',
3360         'dash+xml': 'mpd',
3361         'f4m+xml': 'f4m',
3362         'hds+xml': 'f4m',
3363         'vnd.ms-sstr+xml': 'ism',
3364         'quicktime': 'mov',
3365         'mp2t': 'ts',
3366         'x-wav': 'wav',
3367         'filmstrip+json': 'fs',
3368         'svg+xml': 'svg',
3369     }
3370
3371     _, _, subtype = mt.rpartition('/')
3372     ext = SUBTYPE_MAP.get(subtype.lower())
3373     if ext is not None:
3374         return ext
3375
3376     SUFFIX_MAP = {
3377         'json': 'json',
3378         'xml': 'xml',
3379         'zip': 'zip',
3380         'gzip': 'gz',
3381     }
3382
3383     _, _, suffix = subtype.partition('+')
3384     ext = SUFFIX_MAP.get(suffix)
3385     if ext is not None:
3386         return ext
3387
3388     return subtype.replace('+', '.')
3389
3390
3391 def ext2mimetype(ext_or_url):
3392     if not ext_or_url:
3393         return None
3394     if '.' not in ext_or_url:
3395         ext_or_url = f'file.{ext_or_url}'
3396     return mimetypes.guess_type(ext_or_url)[0]
3397
3398
3399 def parse_codecs(codecs_str):
3400     # http://tools.ietf.org/html/rfc6381
3401     if not codecs_str:
3402         return {}
3403     split_codecs = list(filter(None, map(
3404         str.strip, codecs_str.strip().strip(',').split(','))))
3405     vcodec, acodec, scodec, hdr = None, None, None, None
3406     for full_codec in split_codecs:
3407         parts = full_codec.split('.')
3408         codec = parts[0].replace('0', '')
3409         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3410                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3411             if not vcodec:
3412                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
3413                 if codec in ('dvh1', 'dvhe'):
3414                     hdr = 'DV'
3415                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
3416                     hdr = 'HDR10'
3417                 elif full_codec.replace('0', '').startswith('vp9.2'):
3418                     hdr = 'HDR10'
3419         elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3420             if not acodec:
3421                 acodec = full_codec
3422         elif codec in ('stpp', 'wvtt',):
3423             if not scodec:
3424                 scodec = full_codec
3425         else:
3426             write_string(f'WARNING: Unknown codec {full_codec}\n')
3427     if vcodec or acodec or scodec:
3428         return {
3429             'vcodec': vcodec or 'none',
3430             'acodec': acodec or 'none',
3431             'dynamic_range': hdr,
3432             **({'scodec': scodec} if scodec is not None else {}),
3433         }
3434     elif len(split_codecs) == 2:
3435         return {
3436             'vcodec': split_codecs[0],
3437             'acodec': split_codecs[1],
3438         }
3439     return {}
3440
3441
3442 def urlhandle_detect_ext(url_handle):
3443     getheader = url_handle.headers.get
3444
3445     cd = getheader('Content-Disposition')
3446     if cd:
3447         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3448         if m:
3449             e = determine_ext(m.group('filename'), default_ext=None)
3450             if e:
3451                 return e
3452
3453     return mimetype2ext(getheader('Content-Type'))
3454
3455
3456 def encode_data_uri(data, mime_type):
3457     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3458
3459
3460 def age_restricted(content_limit, age_limit):
3461     """ Returns True iff the content should be blocked """
3462
3463     if age_limit is None:  # No limit set
3464         return False
3465     if content_limit is None:
3466         return False  # Content available for everyone
3467     return age_limit < content_limit
3468
3469
3470 def is_html(first_bytes):
3471     """ Detect whether a file contains HTML by examining its first bytes. """
3472
3473     BOMS = [
3474         (b'\xef\xbb\xbf', 'utf-8'),
3475         (b'\x00\x00\xfe\xff', 'utf-32-be'),
3476         (b'\xff\xfe\x00\x00', 'utf-32-le'),
3477         (b'\xff\xfe', 'utf-16-le'),
3478         (b'\xfe\xff', 'utf-16-be'),
3479     ]
3480
3481     encoding = 'utf-8'
3482     for bom, enc in BOMS:
3483         while first_bytes.startswith(bom):
3484             encoding, first_bytes = enc, first_bytes[len(bom):]
3485
3486     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3487
3488
3489 def determine_protocol(info_dict):
3490     protocol = info_dict.get('protocol')
3491     if protocol is not None:
3492         return protocol
3493
3494     url = sanitize_url(info_dict['url'])
3495     if url.startswith('rtmp'):
3496         return 'rtmp'
3497     elif url.startswith('mms'):
3498         return 'mms'
3499     elif url.startswith('rtsp'):
3500         return 'rtsp'
3501
3502     ext = determine_ext(url)
3503     if ext == 'm3u8':
3504         return 'm3u8'
3505     elif ext == 'f4m':
3506         return 'f4m'
3507
3508     return compat_urllib_parse_urlparse(url).scheme
3509
3510
3511 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3512     """ Render a list of rows, each as a list of values.
3513     Text after a \t will be right aligned """
3514     def width(string):
3515         return len(remove_terminal_sequences(string).replace('\t', ''))
3516
3517     def get_max_lens(table):
3518         return [max(width(str(v)) for v in col) for col in zip(*table)]
3519
3520     def filter_using_list(row, filterArray):
3521         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3522
3523     max_lens = get_max_lens(data) if hide_empty else []
3524     header_row = filter_using_list(header_row, max_lens)
3525     data = [filter_using_list(row, max_lens) for row in data]
3526
3527     table = [header_row] + data
3528     max_lens = get_max_lens(table)
3529     extra_gap += 1
3530     if delim:
3531         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3532         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3533     for row in table:
3534         for pos, text in enumerate(map(str, row)):
3535             if '\t' in text:
3536                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3537             else:
3538                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3539     ret = '\n'.join(''.join(row).rstrip() for row in table)
3540     return ret
3541
3542
3543 def _match_one(filter_part, dct, incomplete):
3544     # TODO: Generalize code with YoutubeDL._build_format_filter
3545     STRING_OPERATORS = {
3546         '*=': operator.contains,
3547         '^=': lambda attr, value: attr.startswith(value),
3548         '$=': lambda attr, value: attr.endswith(value),
3549         '~=': lambda attr, value: re.search(value, attr),
3550     }
3551     COMPARISON_OPERATORS = {
3552         **STRING_OPERATORS,
3553         '<=': operator.le,  # "<=" must be defined above "<"
3554         '<': operator.lt,
3555         '>=': operator.ge,
3556         '>': operator.gt,
3557         '=': operator.eq,
3558     }
3559
3560     if isinstance(incomplete, bool):
3561         is_incomplete = lambda _: incomplete
3562     else:
3563         is_incomplete = lambda k: k in incomplete
3564
3565     operator_rex = re.compile(r'''(?x)
3566         (?P<key>[a-z_]+)
3567         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3568         (?:
3569             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3570             (?P<strval>.+?)
3571         )
3572         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3573     m = operator_rex.fullmatch(filter_part.strip())
3574     if m:
3575         m = m.groupdict()
3576         unnegated_op = COMPARISON_OPERATORS[m['op']]
3577         if m['negation']:
3578             op = lambda attr, value: not unnegated_op(attr, value)
3579         else:
3580             op = unnegated_op
3581         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3582         if m['quote']:
3583             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3584         actual_value = dct.get(m['key'])
3585         numeric_comparison = None
3586         if isinstance(actual_value, (int, float)):
3587             # If the original field is a string and matching comparisonvalue is
3588             # a number we should respect the origin of the original field
3589             # and process comparison value as a string (see
3590             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3591             try:
3592                 numeric_comparison = int(comparison_value)
3593             except ValueError:
3594                 numeric_comparison = parse_filesize(comparison_value)
3595                 if numeric_comparison is None:
3596                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3597                 if numeric_comparison is None:
3598                     numeric_comparison = parse_duration(comparison_value)
3599         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3600             raise ValueError('Operator %s only supports string values!' % m['op'])
3601         if actual_value is None:
3602             return is_incomplete(m['key']) or m['none_inclusive']
3603         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3604
3605     UNARY_OPERATORS = {
3606         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3607         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3608     }
3609     operator_rex = re.compile(r'''(?x)
3610         (?P<op>%s)\s*(?P<key>[a-z_]+)
3611         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3612     m = operator_rex.fullmatch(filter_part.strip())
3613     if m:
3614         op = UNARY_OPERATORS[m.group('op')]
3615         actual_value = dct.get(m.group('key'))
3616         if is_incomplete(m.group('key')) and actual_value is None:
3617             return True
3618         return op(actual_value)
3619
3620     raise ValueError('Invalid filter part %r' % filter_part)
3621
3622
3623 def match_str(filter_str, dct, incomplete=False):
3624     """ Filter a dictionary with a simple string syntax.
3625     @returns           Whether the filter passes
3626     @param incomplete  Set of keys that is expected to be missing from dct.
3627                        Can be True/False to indicate all/none of the keys may be missing.
3628                        All conditions on incomplete keys pass if the key is missing
3629     """
3630     return all(
3631         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3632         for filter_part in re.split(r'(?<!\\)&', filter_str))
3633
3634
3635 def match_filter_func(filters):
3636     if not filters:
3637         return None
3638     filters = set(variadic(filters))
3639
3640     interactive = '-' in filters
3641     if interactive:
3642         filters.remove('-')
3643
3644     def _match_func(info_dict, incomplete=False):
3645         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3646             return NO_DEFAULT if interactive and not incomplete else None
3647         else:
3648             video_title = info_dict.get('title') or info_dict.get('id') or 'video'
3649             filter_str = ') | ('.join(map(str.strip, filters))
3650             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3651     return _match_func
3652
3653
3654 def download_range_func(chapters, ranges):
3655     def inner(info_dict, ydl):
3656         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3657                    else 'Cannot match chapters since chapter information is unavailable')
3658         for regex in chapters or []:
3659             for i, chapter in enumerate(info_dict.get('chapters') or []):
3660                 if re.search(regex, chapter['title']):
3661                     warning = None
3662                     yield {**chapter, 'index': i}
3663         if chapters and warning:
3664             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3665
3666         yield from ({'start_time': start, 'end_time': end} for start, end in ranges or [])
3667
3668     return inner
3669
3670
3671 def parse_dfxp_time_expr(time_expr):
3672     if not time_expr:
3673         return
3674
3675     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3676     if mobj:
3677         return float(mobj.group('time_offset'))
3678
3679     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3680     if mobj:
3681         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3682
3683
3684 def srt_subtitles_timecode(seconds):
3685     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3686
3687
3688 def ass_subtitles_timecode(seconds):
3689     time = timetuple_from_msec(seconds * 1000)
3690     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3691
3692
3693 def dfxp2srt(dfxp_data):
3694     '''
3695     @param dfxp_data A bytes-like object containing DFXP data
3696     @returns A unicode object containing converted SRT data
3697     '''
3698     LEGACY_NAMESPACES = (
3699         (b'http://www.w3.org/ns/ttml', [
3700             b'http://www.w3.org/2004/11/ttaf1',
3701             b'http://www.w3.org/2006/04/ttaf1',
3702             b'http://www.w3.org/2006/10/ttaf1',
3703         ]),
3704         (b'http://www.w3.org/ns/ttml#styling', [
3705             b'http://www.w3.org/ns/ttml#style',
3706         ]),
3707     )
3708
3709     SUPPORTED_STYLING = [
3710         'color',
3711         'fontFamily',
3712         'fontSize',
3713         'fontStyle',
3714         'fontWeight',
3715         'textDecoration'
3716     ]
3717
3718     _x = functools.partial(xpath_with_ns, ns_map={
3719         'xml': 'http://www.w3.org/XML/1998/namespace',
3720         'ttml': 'http://www.w3.org/ns/ttml',
3721         'tts': 'http://www.w3.org/ns/ttml#styling',
3722     })
3723
3724     styles = {}
3725     default_style = {}
3726
3727     class TTMLPElementParser:
3728         _out = ''
3729         _unclosed_elements = []
3730         _applied_styles = []
3731
3732         def start(self, tag, attrib):
3733             if tag in (_x('ttml:br'), 'br'):
3734                 self._out += '\n'
3735             else:
3736                 unclosed_elements = []
3737                 style = {}
3738                 element_style_id = attrib.get('style')
3739                 if default_style:
3740                     style.update(default_style)
3741                 if element_style_id:
3742                     style.update(styles.get(element_style_id, {}))
3743                 for prop in SUPPORTED_STYLING:
3744                     prop_val = attrib.get(_x('tts:' + prop))
3745                     if prop_val:
3746                         style[prop] = prop_val
3747                 if style:
3748                     font = ''
3749                     for k, v in sorted(style.items()):
3750                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3751                             continue
3752                         if k == 'color':
3753                             font += ' color="%s"' % v
3754                         elif k == 'fontSize':
3755                             font += ' size="%s"' % v
3756                         elif k == 'fontFamily':
3757                             font += ' face="%s"' % v
3758                         elif k == 'fontWeight' and v == 'bold':
3759                             self._out += '<b>'
3760                             unclosed_elements.append('b')
3761                         elif k == 'fontStyle' and v == 'italic':
3762                             self._out += '<i>'
3763                             unclosed_elements.append('i')
3764                         elif k == 'textDecoration' and v == 'underline':
3765                             self._out += '<u>'
3766                             unclosed_elements.append('u')
3767                     if font:
3768                         self._out += '<font' + font + '>'
3769                         unclosed_elements.append('font')
3770                     applied_style = {}
3771                     if self._applied_styles:
3772                         applied_style.update(self._applied_styles[-1])
3773                     applied_style.update(style)
3774                     self._applied_styles.append(applied_style)
3775                 self._unclosed_elements.append(unclosed_elements)
3776
3777         def end(self, tag):
3778             if tag not in (_x('ttml:br'), 'br'):
3779                 unclosed_elements = self._unclosed_elements.pop()
3780                 for element in reversed(unclosed_elements):
3781                     self._out += '</%s>' % element
3782                 if unclosed_elements and self._applied_styles:
3783                     self._applied_styles.pop()
3784
3785         def data(self, data):
3786             self._out += data
3787
3788         def close(self):
3789             return self._out.strip()
3790
3791     def parse_node(node):
3792         target = TTMLPElementParser()
3793         parser = xml.etree.ElementTree.XMLParser(target=target)
3794         parser.feed(xml.etree.ElementTree.tostring(node))
3795         return parser.close()
3796
3797     for k, v in LEGACY_NAMESPACES:
3798         for ns in v:
3799             dfxp_data = dfxp_data.replace(ns, k)
3800
3801     dfxp = compat_etree_fromstring(dfxp_data)
3802     out = []
3803     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3804
3805     if not paras:
3806         raise ValueError('Invalid dfxp/TTML subtitle')
3807
3808     repeat = False
3809     while True:
3810         for style in dfxp.findall(_x('.//ttml:style')):
3811             style_id = style.get('id') or style.get(_x('xml:id'))
3812             if not style_id:
3813                 continue
3814             parent_style_id = style.get('style')
3815             if parent_style_id:
3816                 if parent_style_id not in styles:
3817                     repeat = True
3818                     continue
3819                 styles[style_id] = styles[parent_style_id].copy()
3820             for prop in SUPPORTED_STYLING:
3821                 prop_val = style.get(_x('tts:' + prop))
3822                 if prop_val:
3823                     styles.setdefault(style_id, {})[prop] = prop_val
3824         if repeat:
3825             repeat = False
3826         else:
3827             break
3828
3829     for p in ('body', 'div'):
3830         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3831         if ele is None:
3832             continue
3833         style = styles.get(ele.get('style'))
3834         if not style:
3835             continue
3836         default_style.update(style)
3837
3838     for para, index in zip(paras, itertools.count(1)):
3839         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3840         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3841         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3842         if begin_time is None:
3843             continue
3844         if not end_time:
3845             if not dur:
3846                 continue
3847             end_time = begin_time + dur
3848         out.append('%d\n%s --> %s\n%s\n\n' % (
3849             index,
3850             srt_subtitles_timecode(begin_time),
3851             srt_subtitles_timecode(end_time),
3852             parse_node(para)))
3853
3854     return ''.join(out)
3855
3856
3857 def cli_option(params, command_option, param, separator=None):
3858     param = params.get(param)
3859     return ([] if param is None
3860             else [command_option, str(param)] if separator is None
3861             else [f'{command_option}{separator}{param}'])
3862
3863
3864 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3865     param = params.get(param)
3866     assert param in (True, False, None)
3867     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
3868
3869
3870 def cli_valueless_option(params, command_option, param, expected_value=True):
3871     return [command_option] if params.get(param) == expected_value else []
3872
3873
3874 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3875     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3876         if use_compat:
3877             return argdict
3878         else:
3879             argdict = None
3880     if argdict is None:
3881         return default
3882     assert isinstance(argdict, dict)
3883
3884     assert isinstance(keys, (list, tuple))
3885     for key_list in keys:
3886         arg_list = list(filter(
3887             lambda x: x is not None,
3888             [argdict.get(key.lower()) for key in variadic(key_list)]))
3889         if arg_list:
3890             return [arg for args in arg_list for arg in args]
3891     return default
3892
3893
3894 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3895     main_key, exe = main_key.lower(), exe.lower()
3896     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3897     keys = [f'{root_key}{k}' for k in (keys or [''])]
3898     if root_key in keys:
3899         if main_key != exe:
3900             keys.append((main_key, exe))
3901         keys.append('default')
3902     else:
3903         use_compat = False
3904     return cli_configuration_args(argdict, keys, default, use_compat)
3905
3906
3907 class ISO639Utils:
3908     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3909     _lang_map = {
3910         'aa': 'aar',
3911         'ab': 'abk',
3912         'ae': 'ave',
3913         'af': 'afr',
3914         'ak': 'aka',
3915         'am': 'amh',
3916         'an': 'arg',
3917         'ar': 'ara',
3918         'as': 'asm',
3919         'av': 'ava',
3920         'ay': 'aym',
3921         'az': 'aze',
3922         'ba': 'bak',
3923         'be': 'bel',
3924         'bg': 'bul',
3925         'bh': 'bih',
3926         'bi': 'bis',
3927         'bm': 'bam',
3928         'bn': 'ben',
3929         'bo': 'bod',
3930         'br': 'bre',
3931         'bs': 'bos',
3932         'ca': 'cat',
3933         'ce': 'che',
3934         'ch': 'cha',
3935         'co': 'cos',
3936         'cr': 'cre',
3937         'cs': 'ces',
3938         'cu': 'chu',
3939         'cv': 'chv',
3940         'cy': 'cym',
3941         'da': 'dan',
3942         'de': 'deu',
3943         'dv': 'div',
3944         'dz': 'dzo',
3945         'ee': 'ewe',
3946         'el': 'ell',
3947         'en': 'eng',
3948         'eo': 'epo',
3949         'es': 'spa',
3950         'et': 'est',
3951         'eu': 'eus',
3952         'fa': 'fas',
3953         'ff': 'ful',
3954         'fi': 'fin',
3955         'fj': 'fij',
3956         'fo': 'fao',
3957         'fr': 'fra',
3958         'fy': 'fry',
3959         'ga': 'gle',
3960         'gd': 'gla',
3961         'gl': 'glg',
3962         'gn': 'grn',
3963         'gu': 'guj',
3964         'gv': 'glv',
3965         'ha': 'hau',
3966         'he': 'heb',
3967         'iw': 'heb',  # Replaced by he in 1989 revision
3968         'hi': 'hin',
3969         'ho': 'hmo',
3970         'hr': 'hrv',
3971         'ht': 'hat',
3972         'hu': 'hun',
3973         'hy': 'hye',
3974         'hz': 'her',
3975         'ia': 'ina',
3976         'id': 'ind',
3977         'in': 'ind',  # Replaced by id in 1989 revision
3978         'ie': 'ile',
3979         'ig': 'ibo',
3980         'ii': 'iii',
3981         'ik': 'ipk',
3982         'io': 'ido',
3983         'is': 'isl',
3984         'it': 'ita',
3985         'iu': 'iku',
3986         'ja': 'jpn',
3987         'jv': 'jav',
3988         'ka': 'kat',
3989         'kg': 'kon',
3990         'ki': 'kik',
3991         'kj': 'kua',
3992         'kk': 'kaz',
3993         'kl': 'kal',
3994         'km': 'khm',
3995         'kn': 'kan',
3996         'ko': 'kor',
3997         'kr': 'kau',
3998         'ks': 'kas',
3999         'ku': 'kur',
4000         'kv': 'kom',
4001         'kw': 'cor',
4002         'ky': 'kir',
4003         'la': 'lat',
4004         'lb': 'ltz',
4005         'lg': 'lug',
4006         'li': 'lim',
4007         'ln': 'lin',
4008         'lo': 'lao',
4009         'lt': 'lit',
4010         'lu': 'lub',
4011         'lv': 'lav',
4012         'mg': 'mlg',
4013         'mh': 'mah',
4014         'mi': 'mri',
4015         'mk': 'mkd',
4016         'ml': 'mal',
4017         'mn': 'mon',
4018         'mr': 'mar',
4019         'ms': 'msa',
4020         'mt': 'mlt',
4021         'my': 'mya',
4022         'na': 'nau',
4023         'nb': 'nob',
4024         'nd': 'nde',
4025         'ne': 'nep',
4026         'ng': 'ndo',
4027         'nl': 'nld',
4028         'nn': 'nno',
4029         'no': 'nor',
4030         'nr': 'nbl',
4031         'nv': 'nav',
4032         'ny': 'nya',
4033         'oc': 'oci',
4034         'oj': 'oji',
4035         'om': 'orm',
4036         'or': 'ori',
4037         'os': 'oss',
4038         'pa': 'pan',
4039         'pi': 'pli',
4040         'pl': 'pol',
4041         'ps': 'pus',
4042         'pt': 'por',
4043         'qu': 'que',
4044         'rm': 'roh',
4045         'rn': 'run',
4046         'ro': 'ron',
4047         'ru': 'rus',
4048         'rw': 'kin',
4049         'sa': 'san',
4050         'sc': 'srd',
4051         'sd': 'snd',
4052         'se': 'sme',
4053         'sg': 'sag',
4054         'si': 'sin',
4055         'sk': 'slk',
4056         'sl': 'slv',
4057         'sm': 'smo',
4058         'sn': 'sna',
4059         'so': 'som',
4060         'sq': 'sqi',
4061         'sr': 'srp',
4062         'ss': 'ssw',
4063         'st': 'sot',
4064         'su': 'sun',
4065         'sv': 'swe',
4066         'sw': 'swa',
4067         'ta': 'tam',
4068         'te': 'tel',
4069         'tg': 'tgk',
4070         'th': 'tha',
4071         'ti': 'tir',
4072         'tk': 'tuk',
4073         'tl': 'tgl',
4074         'tn': 'tsn',
4075         'to': 'ton',
4076         'tr': 'tur',
4077         'ts': 'tso',
4078         'tt': 'tat',
4079         'tw': 'twi',
4080         'ty': 'tah',
4081         'ug': 'uig',
4082         'uk': 'ukr',
4083         'ur': 'urd',
4084         'uz': 'uzb',
4085         've': 'ven',
4086         'vi': 'vie',
4087         'vo': 'vol',
4088         'wa': 'wln',
4089         'wo': 'wol',
4090         'xh': 'xho',
4091         'yi': 'yid',
4092         'ji': 'yid',  # Replaced by yi in 1989 revision
4093         'yo': 'yor',
4094         'za': 'zha',
4095         'zh': 'zho',
4096         'zu': 'zul',
4097     }
4098
4099     @classmethod
4100     def short2long(cls, code):
4101         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4102         return cls._lang_map.get(code[:2])
4103
4104     @classmethod
4105     def long2short(cls, code):
4106         """Convert language code from ISO 639-2/T to ISO 639-1"""
4107         for short_name, long_name in cls._lang_map.items():
4108             if long_name == code:
4109                 return short_name
4110
4111
4112 class ISO3166Utils:
4113     # From http://data.okfn.org/data/core/country-list
4114     _country_map = {
4115         'AF': 'Afghanistan',
4116         'AX': 'Åland Islands',
4117         'AL': 'Albania',
4118         'DZ': 'Algeria',
4119         'AS': 'American Samoa',
4120         'AD': 'Andorra',
4121         'AO': 'Angola',
4122         'AI': 'Anguilla',
4123         'AQ': 'Antarctica',
4124         'AG': 'Antigua and Barbuda',
4125         'AR': 'Argentina',
4126         'AM': 'Armenia',
4127         'AW': 'Aruba',
4128         'AU': 'Australia',
4129         'AT': 'Austria',
4130         'AZ': 'Azerbaijan',
4131         'BS': 'Bahamas',
4132         'BH': 'Bahrain',
4133         'BD': 'Bangladesh',
4134         'BB': 'Barbados',
4135         'BY': 'Belarus',
4136         'BE': 'Belgium',
4137         'BZ': 'Belize',
4138         'BJ': 'Benin',
4139         'BM': 'Bermuda',
4140         'BT': 'Bhutan',
4141         'BO': 'Bolivia, Plurinational State of',
4142         'BQ': 'Bonaire, Sint Eustatius and Saba',
4143         'BA': 'Bosnia and Herzegovina',
4144         'BW': 'Botswana',
4145         'BV': 'Bouvet Island',
4146         'BR': 'Brazil',
4147         'IO': 'British Indian Ocean Territory',
4148         'BN': 'Brunei Darussalam',
4149         'BG': 'Bulgaria',
4150         'BF': 'Burkina Faso',
4151         'BI': 'Burundi',
4152         'KH': 'Cambodia',
4153         'CM': 'Cameroon',
4154         'CA': 'Canada',
4155         'CV': 'Cape Verde',
4156         'KY': 'Cayman Islands',
4157         'CF': 'Central African Republic',
4158         'TD': 'Chad',
4159         'CL': 'Chile',
4160         'CN': 'China',
4161         'CX': 'Christmas Island',
4162         'CC': 'Cocos (Keeling) Islands',
4163         'CO': 'Colombia',
4164         'KM': 'Comoros',
4165         'CG': 'Congo',
4166         'CD': 'Congo, the Democratic Republic of the',
4167         'CK': 'Cook Islands',
4168         'CR': 'Costa Rica',
4169         'CI': 'Côte d\'Ivoire',
4170         'HR': 'Croatia',
4171         'CU': 'Cuba',
4172         'CW': 'Curaçao',
4173         'CY': 'Cyprus',
4174         'CZ': 'Czech Republic',
4175         'DK': 'Denmark',
4176         'DJ': 'Djibouti',
4177         'DM': 'Dominica',
4178         'DO': 'Dominican Republic',
4179         'EC': 'Ecuador',
4180         'EG': 'Egypt',
4181         'SV': 'El Salvador',
4182         'GQ': 'Equatorial Guinea',
4183         'ER': 'Eritrea',
4184         'EE': 'Estonia',
4185         'ET': 'Ethiopia',
4186         'FK': 'Falkland Islands (Malvinas)',
4187         'FO': 'Faroe Islands',
4188         'FJ': 'Fiji',
4189         'FI': 'Finland',
4190         'FR': 'France',
4191         'GF': 'French Guiana',
4192         'PF': 'French Polynesia',
4193         'TF': 'French Southern Territories',
4194         'GA': 'Gabon',
4195         'GM': 'Gambia',
4196         'GE': 'Georgia',
4197         'DE': 'Germany',
4198         'GH': 'Ghana',
4199         'GI': 'Gibraltar',
4200         'GR': 'Greece',
4201         'GL': 'Greenland',
4202         'GD': 'Grenada',
4203         'GP': 'Guadeloupe',
4204         'GU': 'Guam',
4205         'GT': 'Guatemala',
4206         'GG': 'Guernsey',
4207         'GN': 'Guinea',
4208         'GW': 'Guinea-Bissau',
4209         'GY': 'Guyana',
4210         'HT': 'Haiti',
4211         'HM': 'Heard Island and McDonald Islands',
4212         'VA': 'Holy See (Vatican City State)',
4213         'HN': 'Honduras',
4214         'HK': 'Hong Kong',
4215         'HU': 'Hungary',
4216         'IS': 'Iceland',
4217         'IN': 'India',
4218         'ID': 'Indonesia',
4219         'IR': 'Iran, Islamic Republic of',
4220         'IQ': 'Iraq',
4221         'IE': 'Ireland',
4222         'IM': 'Isle of Man',
4223         'IL': 'Israel',
4224         'IT': 'Italy',
4225         'JM': 'Jamaica',
4226         'JP': 'Japan',
4227         'JE': 'Jersey',
4228         'JO': 'Jordan',
4229         'KZ': 'Kazakhstan',
4230         'KE': 'Kenya',
4231         'KI': 'Kiribati',
4232         'KP': 'Korea, Democratic People\'s Republic of',
4233         'KR': 'Korea, Republic of',
4234         'KW': 'Kuwait',
4235         'KG': 'Kyrgyzstan',
4236         'LA': 'Lao People\'s Democratic Republic',
4237         'LV': 'Latvia',
4238         'LB': 'Lebanon',
4239         'LS': 'Lesotho',
4240         'LR': 'Liberia',
4241         'LY': 'Libya',
4242         'LI': 'Liechtenstein',
4243         'LT': 'Lithuania',
4244         'LU': 'Luxembourg',
4245         'MO': 'Macao',
4246         'MK': 'Macedonia, the Former Yugoslav Republic of',
4247         'MG': 'Madagascar',
4248         'MW': 'Malawi',
4249         'MY': 'Malaysia',
4250         'MV': 'Maldives',
4251         'ML': 'Mali',
4252         'MT': 'Malta',
4253         'MH': 'Marshall Islands',
4254         'MQ': 'Martinique',
4255         'MR': 'Mauritania',
4256         'MU': 'Mauritius',
4257         'YT': 'Mayotte',
4258         'MX': 'Mexico',
4259         'FM': 'Micronesia, Federated States of',
4260         'MD': 'Moldova, Republic of',
4261         'MC': 'Monaco',
4262         'MN': 'Mongolia',
4263         'ME': 'Montenegro',
4264         'MS': 'Montserrat',
4265         'MA': 'Morocco',
4266         'MZ': 'Mozambique',
4267         'MM': 'Myanmar',
4268         'NA': 'Namibia',
4269         'NR': 'Nauru',
4270         'NP': 'Nepal',
4271         'NL': 'Netherlands',
4272         'NC': 'New Caledonia',
4273         'NZ': 'New Zealand',
4274         'NI': 'Nicaragua',
4275         'NE': 'Niger',
4276         'NG': 'Nigeria',
4277         'NU': 'Niue',
4278         'NF': 'Norfolk Island',
4279         'MP': 'Northern Mariana Islands',
4280         'NO': 'Norway',
4281         'OM': 'Oman',
4282         'PK': 'Pakistan',
4283         'PW': 'Palau',
4284         'PS': 'Palestine, State of',
4285         'PA': 'Panama',
4286         'PG': 'Papua New Guinea',
4287         'PY': 'Paraguay',
4288         'PE': 'Peru',
4289         'PH': 'Philippines',
4290         'PN': 'Pitcairn',
4291         'PL': 'Poland',
4292         'PT': 'Portugal',
4293         'PR': 'Puerto Rico',
4294         'QA': 'Qatar',
4295         'RE': 'Réunion',
4296         'RO': 'Romania',
4297         'RU': 'Russian Federation',
4298         'RW': 'Rwanda',
4299         'BL': 'Saint Barthélemy',
4300         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4301         'KN': 'Saint Kitts and Nevis',
4302         'LC': 'Saint Lucia',
4303         'MF': 'Saint Martin (French part)',
4304         'PM': 'Saint Pierre and Miquelon',
4305         'VC': 'Saint Vincent and the Grenadines',
4306         'WS': 'Samoa',
4307         'SM': 'San Marino',
4308         'ST': 'Sao Tome and Principe',
4309         'SA': 'Saudi Arabia',
4310         'SN': 'Senegal',
4311         'RS': 'Serbia',
4312         'SC': 'Seychelles',
4313         'SL': 'Sierra Leone',
4314         'SG': 'Singapore',
4315         'SX': 'Sint Maarten (Dutch part)',
4316         'SK': 'Slovakia',
4317         'SI': 'Slovenia',
4318         'SB': 'Solomon Islands',
4319         'SO': 'Somalia',
4320         'ZA': 'South Africa',
4321         'GS': 'South Georgia and the South Sandwich Islands',
4322         'SS': 'South Sudan',
4323         'ES': 'Spain',
4324         'LK': 'Sri Lanka',
4325         'SD': 'Sudan',
4326         'SR': 'Suriname',
4327         'SJ': 'Svalbard and Jan Mayen',
4328         'SZ': 'Swaziland',
4329         'SE': 'Sweden',
4330         'CH': 'Switzerland',
4331         'SY': 'Syrian Arab Republic',
4332         'TW': 'Taiwan, Province of China',
4333         'TJ': 'Tajikistan',
4334         'TZ': 'Tanzania, United Republic of',
4335         'TH': 'Thailand',
4336         'TL': 'Timor-Leste',
4337         'TG': 'Togo',
4338         'TK': 'Tokelau',
4339         'TO': 'Tonga',
4340         'TT': 'Trinidad and Tobago',
4341         'TN': 'Tunisia',
4342         'TR': 'Turkey',
4343         'TM': 'Turkmenistan',
4344         'TC': 'Turks and Caicos Islands',
4345         'TV': 'Tuvalu',
4346         'UG': 'Uganda',
4347         'UA': 'Ukraine',
4348         'AE': 'United Arab Emirates',
4349         'GB': 'United Kingdom',
4350         'US': 'United States',
4351         'UM': 'United States Minor Outlying Islands',
4352         'UY': 'Uruguay',
4353         'UZ': 'Uzbekistan',
4354         'VU': 'Vanuatu',
4355         'VE': 'Venezuela, Bolivarian Republic of',
4356         'VN': 'Viet Nam',
4357         'VG': 'Virgin Islands, British',
4358         'VI': 'Virgin Islands, U.S.',
4359         'WF': 'Wallis and Futuna',
4360         'EH': 'Western Sahara',
4361         'YE': 'Yemen',
4362         'ZM': 'Zambia',
4363         'ZW': 'Zimbabwe',
4364         # Not ISO 3166 codes, but used for IP blocks
4365         'AP': 'Asia/Pacific Region',
4366         'EU': 'Europe',
4367     }
4368
4369     @classmethod
4370     def short2full(cls, code):
4371         """Convert an ISO 3166-2 country code to the corresponding full name"""
4372         return cls._country_map.get(code.upper())
4373
4374
4375 class GeoUtils:
4376     # Major IPv4 address blocks per country
4377     _country_ip_map = {
4378         'AD': '46.172.224.0/19',
4379         'AE': '94.200.0.0/13',
4380         'AF': '149.54.0.0/17',
4381         'AG': '209.59.64.0/18',
4382         'AI': '204.14.248.0/21',
4383         'AL': '46.99.0.0/16',
4384         'AM': '46.70.0.0/15',
4385         'AO': '105.168.0.0/13',
4386         'AP': '182.50.184.0/21',
4387         'AQ': '23.154.160.0/24',
4388         'AR': '181.0.0.0/12',
4389         'AS': '202.70.112.0/20',
4390         'AT': '77.116.0.0/14',
4391         'AU': '1.128.0.0/11',
4392         'AW': '181.41.0.0/18',
4393         'AX': '185.217.4.0/22',
4394         'AZ': '5.197.0.0/16',
4395         'BA': '31.176.128.0/17',
4396         'BB': '65.48.128.0/17',
4397         'BD': '114.130.0.0/16',
4398         'BE': '57.0.0.0/8',
4399         'BF': '102.178.0.0/15',
4400         'BG': '95.42.0.0/15',
4401         'BH': '37.131.0.0/17',
4402         'BI': '154.117.192.0/18',
4403         'BJ': '137.255.0.0/16',
4404         'BL': '185.212.72.0/23',
4405         'BM': '196.12.64.0/18',
4406         'BN': '156.31.0.0/16',
4407         'BO': '161.56.0.0/16',
4408         'BQ': '161.0.80.0/20',
4409         'BR': '191.128.0.0/12',
4410         'BS': '24.51.64.0/18',
4411         'BT': '119.2.96.0/19',
4412         'BW': '168.167.0.0/16',
4413         'BY': '178.120.0.0/13',
4414         'BZ': '179.42.192.0/18',
4415         'CA': '99.224.0.0/11',
4416         'CD': '41.243.0.0/16',
4417         'CF': '197.242.176.0/21',
4418         'CG': '160.113.0.0/16',
4419         'CH': '85.0.0.0/13',
4420         'CI': '102.136.0.0/14',
4421         'CK': '202.65.32.0/19',
4422         'CL': '152.172.0.0/14',
4423         'CM': '102.244.0.0/14',
4424         'CN': '36.128.0.0/10',
4425         'CO': '181.240.0.0/12',
4426         'CR': '201.192.0.0/12',
4427         'CU': '152.206.0.0/15',
4428         'CV': '165.90.96.0/19',
4429         'CW': '190.88.128.0/17',
4430         'CY': '31.153.0.0/16',
4431         'CZ': '88.100.0.0/14',
4432         'DE': '53.0.0.0/8',
4433         'DJ': '197.241.0.0/17',
4434         'DK': '87.48.0.0/12',
4435         'DM': '192.243.48.0/20',
4436         'DO': '152.166.0.0/15',
4437         'DZ': '41.96.0.0/12',
4438         'EC': '186.68.0.0/15',
4439         'EE': '90.190.0.0/15',
4440         'EG': '156.160.0.0/11',
4441         'ER': '196.200.96.0/20',
4442         'ES': '88.0.0.0/11',
4443         'ET': '196.188.0.0/14',
4444         'EU': '2.16.0.0/13',
4445         'FI': '91.152.0.0/13',
4446         'FJ': '144.120.0.0/16',
4447         'FK': '80.73.208.0/21',
4448         'FM': '119.252.112.0/20',
4449         'FO': '88.85.32.0/19',
4450         'FR': '90.0.0.0/9',
4451         'GA': '41.158.0.0/15',
4452         'GB': '25.0.0.0/8',
4453         'GD': '74.122.88.0/21',
4454         'GE': '31.146.0.0/16',
4455         'GF': '161.22.64.0/18',
4456         'GG': '62.68.160.0/19',
4457         'GH': '154.160.0.0/12',
4458         'GI': '95.164.0.0/16',
4459         'GL': '88.83.0.0/19',
4460         'GM': '160.182.0.0/15',
4461         'GN': '197.149.192.0/18',
4462         'GP': '104.250.0.0/19',
4463         'GQ': '105.235.224.0/20',
4464         'GR': '94.64.0.0/13',
4465         'GT': '168.234.0.0/16',
4466         'GU': '168.123.0.0/16',
4467         'GW': '197.214.80.0/20',
4468         'GY': '181.41.64.0/18',
4469         'HK': '113.252.0.0/14',
4470         'HN': '181.210.0.0/16',
4471         'HR': '93.136.0.0/13',
4472         'HT': '148.102.128.0/17',
4473         'HU': '84.0.0.0/14',
4474         'ID': '39.192.0.0/10',
4475         'IE': '87.32.0.0/12',
4476         'IL': '79.176.0.0/13',
4477         'IM': '5.62.80.0/20',
4478         'IN': '117.192.0.0/10',
4479         'IO': '203.83.48.0/21',
4480         'IQ': '37.236.0.0/14',
4481         'IR': '2.176.0.0/12',
4482         'IS': '82.221.0.0/16',
4483         'IT': '79.0.0.0/10',
4484         'JE': '87.244.64.0/18',
4485         'JM': '72.27.0.0/17',
4486         'JO': '176.29.0.0/16',
4487         'JP': '133.0.0.0/8',
4488         'KE': '105.48.0.0/12',
4489         'KG': '158.181.128.0/17',
4490         'KH': '36.37.128.0/17',
4491         'KI': '103.25.140.0/22',
4492         'KM': '197.255.224.0/20',
4493         'KN': '198.167.192.0/19',
4494         'KP': '175.45.176.0/22',
4495         'KR': '175.192.0.0/10',
4496         'KW': '37.36.0.0/14',
4497         'KY': '64.96.0.0/15',
4498         'KZ': '2.72.0.0/13',
4499         'LA': '115.84.64.0/18',
4500         'LB': '178.135.0.0/16',
4501         'LC': '24.92.144.0/20',
4502         'LI': '82.117.0.0/19',
4503         'LK': '112.134.0.0/15',
4504         'LR': '102.183.0.0/16',
4505         'LS': '129.232.0.0/17',
4506         'LT': '78.56.0.0/13',
4507         'LU': '188.42.0.0/16',
4508         'LV': '46.109.0.0/16',
4509         'LY': '41.252.0.0/14',
4510         'MA': '105.128.0.0/11',
4511         'MC': '88.209.64.0/18',
4512         'MD': '37.246.0.0/16',
4513         'ME': '178.175.0.0/17',
4514         'MF': '74.112.232.0/21',
4515         'MG': '154.126.0.0/17',
4516         'MH': '117.103.88.0/21',
4517         'MK': '77.28.0.0/15',
4518         'ML': '154.118.128.0/18',
4519         'MM': '37.111.0.0/17',
4520         'MN': '49.0.128.0/17',
4521         'MO': '60.246.0.0/16',
4522         'MP': '202.88.64.0/20',
4523         'MQ': '109.203.224.0/19',
4524         'MR': '41.188.64.0/18',
4525         'MS': '208.90.112.0/22',
4526         'MT': '46.11.0.0/16',
4527         'MU': '105.16.0.0/12',
4528         'MV': '27.114.128.0/18',
4529         'MW': '102.70.0.0/15',
4530         'MX': '187.192.0.0/11',
4531         'MY': '175.136.0.0/13',
4532         'MZ': '197.218.0.0/15',
4533         'NA': '41.182.0.0/16',
4534         'NC': '101.101.0.0/18',
4535         'NE': '197.214.0.0/18',
4536         'NF': '203.17.240.0/22',
4537         'NG': '105.112.0.0/12',
4538         'NI': '186.76.0.0/15',
4539         'NL': '145.96.0.0/11',
4540         'NO': '84.208.0.0/13',
4541         'NP': '36.252.0.0/15',
4542         'NR': '203.98.224.0/19',
4543         'NU': '49.156.48.0/22',
4544         'NZ': '49.224.0.0/14',
4545         'OM': '5.36.0.0/15',
4546         'PA': '186.72.0.0/15',
4547         'PE': '186.160.0.0/14',
4548         'PF': '123.50.64.0/18',
4549         'PG': '124.240.192.0/19',
4550         'PH': '49.144.0.0/13',
4551         'PK': '39.32.0.0/11',
4552         'PL': '83.0.0.0/11',
4553         'PM': '70.36.0.0/20',
4554         'PR': '66.50.0.0/16',
4555         'PS': '188.161.0.0/16',
4556         'PT': '85.240.0.0/13',
4557         'PW': '202.124.224.0/20',
4558         'PY': '181.120.0.0/14',
4559         'QA': '37.210.0.0/15',
4560         'RE': '102.35.0.0/16',
4561         'RO': '79.112.0.0/13',
4562         'RS': '93.86.0.0/15',
4563         'RU': '5.136.0.0/13',
4564         'RW': '41.186.0.0/16',
4565         'SA': '188.48.0.0/13',
4566         'SB': '202.1.160.0/19',
4567         'SC': '154.192.0.0/11',
4568         'SD': '102.120.0.0/13',
4569         'SE': '78.64.0.0/12',
4570         'SG': '8.128.0.0/10',
4571         'SI': '188.196.0.0/14',
4572         'SK': '78.98.0.0/15',
4573         'SL': '102.143.0.0/17',
4574         'SM': '89.186.32.0/19',
4575         'SN': '41.82.0.0/15',
4576         'SO': '154.115.192.0/18',
4577         'SR': '186.179.128.0/17',
4578         'SS': '105.235.208.0/21',
4579         'ST': '197.159.160.0/19',
4580         'SV': '168.243.0.0/16',
4581         'SX': '190.102.0.0/20',
4582         'SY': '5.0.0.0/16',
4583         'SZ': '41.84.224.0/19',
4584         'TC': '65.255.48.0/20',
4585         'TD': '154.68.128.0/19',
4586         'TG': '196.168.0.0/14',
4587         'TH': '171.96.0.0/13',
4588         'TJ': '85.9.128.0/18',
4589         'TK': '27.96.24.0/21',
4590         'TL': '180.189.160.0/20',
4591         'TM': '95.85.96.0/19',
4592         'TN': '197.0.0.0/11',
4593         'TO': '175.176.144.0/21',
4594         'TR': '78.160.0.0/11',
4595         'TT': '186.44.0.0/15',
4596         'TV': '202.2.96.0/19',
4597         'TW': '120.96.0.0/11',
4598         'TZ': '156.156.0.0/14',
4599         'UA': '37.52.0.0/14',
4600         'UG': '102.80.0.0/13',
4601         'US': '6.0.0.0/8',
4602         'UY': '167.56.0.0/13',
4603         'UZ': '84.54.64.0/18',
4604         'VA': '212.77.0.0/19',
4605         'VC': '207.191.240.0/21',
4606         'VE': '186.88.0.0/13',
4607         'VG': '66.81.192.0/20',
4608         'VI': '146.226.0.0/16',
4609         'VN': '14.160.0.0/11',
4610         'VU': '202.80.32.0/20',
4611         'WF': '117.20.32.0/21',
4612         'WS': '202.4.32.0/19',
4613         'YE': '134.35.0.0/16',
4614         'YT': '41.242.116.0/22',
4615         'ZA': '41.0.0.0/11',
4616         'ZM': '102.144.0.0/13',
4617         'ZW': '102.177.192.0/18',
4618     }
4619
4620     @classmethod
4621     def random_ipv4(cls, code_or_block):
4622         if len(code_or_block) == 2:
4623             block = cls._country_ip_map.get(code_or_block.upper())
4624             if not block:
4625                 return None
4626         else:
4627             block = code_or_block
4628         addr, preflen = block.split('/')
4629         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
4630         addr_max = addr_min | (0xffffffff >> int(preflen))
4631         return compat_str(socket.inet_ntoa(
4632             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
4633
4634
4635 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
4636     def __init__(self, proxies=None):
4637         # Set default handlers
4638         for type in ('http', 'https'):
4639             setattr(self, '%s_open' % type,
4640                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4641                         meth(r, proxy, type))
4642         compat_urllib_request.ProxyHandler.__init__(self, proxies)
4643
4644     def proxy_open(self, req, proxy, type):
4645         req_proxy = req.headers.get('Ytdl-request-proxy')
4646         if req_proxy is not None:
4647             proxy = req_proxy
4648             del req.headers['Ytdl-request-proxy']
4649
4650         if proxy == '__noproxy__':
4651             return None  # No Proxy
4652         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4653             req.add_header('Ytdl-socks-proxy', proxy)
4654             # yt-dlp's http/https handlers do wrapping the socket with socks
4655             return None
4656         return compat_urllib_request.ProxyHandler.proxy_open(
4657             self, req, proxy, type)
4658
4659
4660 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4661 # released into Public Domain
4662 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4663
4664 def long_to_bytes(n, blocksize=0):
4665     """long_to_bytes(n:long, blocksize:int) : string
4666     Convert a long integer to a byte string.
4667
4668     If optional blocksize is given and greater than zero, pad the front of the
4669     byte string with binary zeros so that the length is a multiple of
4670     blocksize.
4671     """
4672     # after much testing, this algorithm was deemed to be the fastest
4673     s = b''
4674     n = int(n)
4675     while n > 0:
4676         s = compat_struct_pack('>I', n & 0xffffffff) + s
4677         n = n >> 32
4678     # strip off leading zeros
4679     for i in range(len(s)):
4680         if s[i] != b'\000'[0]:
4681             break
4682     else:
4683         # only happens when n == 0
4684         s = b'\000'
4685         i = 0
4686     s = s[i:]
4687     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4688     # de-padding being done above, but sigh...
4689     if blocksize > 0 and len(s) % blocksize:
4690         s = (blocksize - len(s) % blocksize) * b'\000' + s
4691     return s
4692
4693
4694 def bytes_to_long(s):
4695     """bytes_to_long(string) : long
4696     Convert a byte string to a long integer.
4697
4698     This is (essentially) the inverse of long_to_bytes().
4699     """
4700     acc = 0
4701     length = len(s)
4702     if length % 4:
4703         extra = (4 - length % 4)
4704         s = b'\000' * extra + s
4705         length = length + extra
4706     for i in range(0, length, 4):
4707         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
4708     return acc
4709
4710
4711 def ohdave_rsa_encrypt(data, exponent, modulus):
4712     '''
4713     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4714
4715     Input:
4716         data: data to encrypt, bytes-like object
4717         exponent, modulus: parameter e and N of RSA algorithm, both integer
4718     Output: hex string of encrypted data
4719
4720     Limitation: supports one block encryption only
4721     '''
4722
4723     payload = int(binascii.hexlify(data[::-1]), 16)
4724     encrypted = pow(payload, exponent, modulus)
4725     return '%x' % encrypted
4726
4727
4728 def pkcs1pad(data, length):
4729     """
4730     Padding input data with PKCS#1 scheme
4731
4732     @param {int[]} data        input data
4733     @param {int}   length      target length
4734     @returns {int[]}           padded data
4735     """
4736     if len(data) > length - 11:
4737         raise ValueError('Input data too long for PKCS#1 padding')
4738
4739     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4740     return [0, 2] + pseudo_random + [0] + data
4741
4742
4743 def encode_base_n(num, n, table=None):
4744     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
4745     if not table:
4746         table = FULL_TABLE[:n]
4747
4748     if n > len(table):
4749         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
4750
4751     if num == 0:
4752         return table[0]
4753
4754     ret = ''
4755     while num:
4756         ret = table[num % n] + ret
4757         num = num // n
4758     return ret
4759
4760
4761 def decode_packed_codes(code):
4762     mobj = re.search(PACKED_CODES_RE, code)
4763     obfuscated_code, base, count, symbols = mobj.groups()
4764     base = int(base)
4765     count = int(count)
4766     symbols = symbols.split('|')
4767     symbol_table = {}
4768
4769     while count:
4770         count -= 1
4771         base_n_count = encode_base_n(count, base)
4772         symbol_table[base_n_count] = symbols[count] or base_n_count
4773
4774     return re.sub(
4775         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4776         obfuscated_code)
4777
4778
4779 def caesar(s, alphabet, shift):
4780     if shift == 0:
4781         return s
4782     l = len(alphabet)
4783     return ''.join(
4784         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4785         for c in s)
4786
4787
4788 def rot47(s):
4789     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4790
4791
4792 def parse_m3u8_attributes(attrib):
4793     info = {}
4794     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4795         if val.startswith('"'):
4796             val = val[1:-1]
4797         info[key] = val
4798     return info
4799
4800
4801 def urshift(val, n):
4802     return val >> n if val >= 0 else (val + 0x100000000) >> n
4803
4804
4805 # Based on png2str() written by @gdkchan and improved by @yokrysty
4806 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4807 def decode_png(png_data):
4808     # Reference: https://www.w3.org/TR/PNG/
4809     header = png_data[8:]
4810
4811     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4812         raise OSError('Not a valid PNG file.')
4813
4814     int_map = {1: '>B', 2: '>H', 4: '>I'}
4815     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
4816
4817     chunks = []
4818
4819     while header:
4820         length = unpack_integer(header[:4])
4821         header = header[4:]
4822
4823         chunk_type = header[:4]
4824         header = header[4:]
4825
4826         chunk_data = header[:length]
4827         header = header[length:]
4828
4829         header = header[4:]  # Skip CRC
4830
4831         chunks.append({
4832             'type': chunk_type,
4833             'length': length,
4834             'data': chunk_data
4835         })
4836
4837     ihdr = chunks[0]['data']
4838
4839     width = unpack_integer(ihdr[:4])
4840     height = unpack_integer(ihdr[4:8])
4841
4842     idat = b''
4843
4844     for chunk in chunks:
4845         if chunk['type'] == b'IDAT':
4846             idat += chunk['data']
4847
4848     if not idat:
4849         raise OSError('Unable to read PNG data.')
4850
4851     decompressed_data = bytearray(zlib.decompress(idat))
4852
4853     stride = width * 3
4854     pixels = []
4855
4856     def _get_pixel(idx):
4857         x = idx % stride
4858         y = idx // stride
4859         return pixels[y][x]
4860
4861     for y in range(height):
4862         basePos = y * (1 + stride)
4863         filter_type = decompressed_data[basePos]
4864
4865         current_row = []
4866
4867         pixels.append(current_row)
4868
4869         for x in range(stride):
4870             color = decompressed_data[1 + basePos + x]
4871             basex = y * stride + x
4872             left = 0
4873             up = 0
4874
4875             if x > 2:
4876                 left = _get_pixel(basex - 3)
4877             if y > 0:
4878                 up = _get_pixel(basex - stride)
4879
4880             if filter_type == 1:  # Sub
4881                 color = (color + left) & 0xff
4882             elif filter_type == 2:  # Up
4883                 color = (color + up) & 0xff
4884             elif filter_type == 3:  # Average
4885                 color = (color + ((left + up) >> 1)) & 0xff
4886             elif filter_type == 4:  # Paeth
4887                 a = left
4888                 b = up
4889                 c = 0
4890
4891                 if x > 2 and y > 0:
4892                     c = _get_pixel(basex - stride - 3)
4893
4894                 p = a + b - c
4895
4896                 pa = abs(p - a)
4897                 pb = abs(p - b)
4898                 pc = abs(p - c)
4899
4900                 if pa <= pb and pa <= pc:
4901                     color = (color + a) & 0xff
4902                 elif pb <= pc:
4903                     color = (color + b) & 0xff
4904                 else:
4905                     color = (color + c) & 0xff
4906
4907             current_row.append(color)
4908
4909     return width, height, pixels
4910
4911
4912 def write_xattr(path, key, value):
4913     # Windows: Write xattrs to NTFS Alternate Data Streams:
4914     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4915     if compat_os_name == 'nt':
4916         assert ':' not in key
4917         assert os.path.exists(path)
4918
4919         try:
4920             with open(f'{path}:{key}', 'wb') as f:
4921                 f.write(value)
4922         except OSError as e:
4923             raise XAttrMetadataError(e.errno, e.strerror)
4924         return
4925
4926     # UNIX Method 1. Use xattrs/pyxattrs modules
4927     from .dependencies import xattr
4928
4929     setxattr = None
4930     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
4931         # Unicode arguments are not supported in pyxattr until version 0.5.0
4932         # See https://github.com/ytdl-org/youtube-dl/issues/5498
4933         if version_tuple(xattr.__version__) >= (0, 5, 0):
4934             setxattr = xattr.set
4935     elif xattr:
4936         setxattr = xattr.setxattr
4937
4938     if setxattr:
4939         try:
4940             setxattr(path, key, value)
4941         except OSError as e:
4942             raise XAttrMetadataError(e.errno, e.strerror)
4943         return
4944
4945     # UNIX Method 2. Use setfattr/xattr executables
4946     exe = ('setfattr' if check_executable('setfattr', ['--version'])
4947            else 'xattr' if check_executable('xattr', ['-h']) else None)
4948     if not exe:
4949         raise XAttrUnavailableError(
4950             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
4951             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
4952
4953     value = value.decode()
4954     try:
4955         _, stderr, returncode = Popen.run(
4956             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
4957             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
4958     except OSError as e:
4959         raise XAttrMetadataError(e.errno, e.strerror)
4960     if returncode:
4961         raise XAttrMetadataError(returncode, stderr)
4962
4963
4964 def random_birthday(year_field, month_field, day_field):
4965     start_date = datetime.date(1950, 1, 1)
4966     end_date = datetime.date(1995, 12, 31)
4967     offset = random.randint(0, (end_date - start_date).days)
4968     random_date = start_date + datetime.timedelta(offset)
4969     return {
4970         year_field: str(random_date.year),
4971         month_field: str(random_date.month),
4972         day_field: str(random_date.day),
4973     }
4974
4975
4976 # Templates for internet shortcut files, which are plain text files.
4977 DOT_URL_LINK_TEMPLATE = '''\
4978 [InternetShortcut]
4979 URL=%(url)s
4980 '''
4981
4982 DOT_WEBLOC_LINK_TEMPLATE = '''\
4983 <?xml version="1.0" encoding="UTF-8"?>
4984 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4985 <plist version="1.0">
4986 <dict>
4987 \t<key>URL</key>
4988 \t<string>%(url)s</string>
4989 </dict>
4990 </plist>
4991 '''
4992
4993 DOT_DESKTOP_LINK_TEMPLATE = '''\
4994 [Desktop Entry]
4995 Encoding=UTF-8
4996 Name=%(filename)s
4997 Type=Link
4998 URL=%(url)s
4999 Icon=text-html
5000 '''
5001
5002 LINK_TEMPLATES = {
5003     'url': DOT_URL_LINK_TEMPLATE,
5004     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5005     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5006 }
5007
5008
5009 def iri_to_uri(iri):
5010     """
5011     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5012
5013     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5014     """
5015
5016     iri_parts = compat_urllib_parse_urlparse(iri)
5017
5018     if '[' in iri_parts.netloc:
5019         raise ValueError('IPv6 URIs are not, yet, supported.')
5020         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5021
5022     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5023
5024     net_location = ''
5025     if iri_parts.username:
5026         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5027         if iri_parts.password is not None:
5028             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5029         net_location += '@'
5030
5031     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5032     # The 'idna' encoding produces ASCII text.
5033     if iri_parts.port is not None and iri_parts.port != 80:
5034         net_location += ':' + str(iri_parts.port)
5035
5036     return urllib.parse.urlunparse(
5037         (iri_parts.scheme,
5038             net_location,
5039
5040             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5041
5042             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5043             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5044
5045             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5046             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5047
5048             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5049
5050     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5051
5052
5053 def to_high_limit_path(path):
5054     if sys.platform in ['win32', 'cygwin']:
5055         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5056         return '\\\\?\\' + os.path.abspath(path)
5057
5058     return path
5059
5060
5061 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=None):
5062     val = traverse_obj(obj, *variadic(field))
5063     if (not val and val != 0) if ignore is NO_DEFAULT else val in ignore:
5064         return default
5065     return template % (func(val) if func else val)
5066
5067
5068 def clean_podcast_url(url):
5069     return re.sub(r'''(?x)
5070         (?:
5071             (?:
5072                 chtbl\.com/track|
5073                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5074                 play\.podtrac\.com
5075             )/[^/]+|
5076             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5077             flex\.acast\.com|
5078             pd(?:
5079                 cn\.co| # https://podcorn.com/analytics-prefix/
5080                 st\.fm # https://podsights.com/docs/
5081             )/e
5082         )/''', '', url)
5083
5084
5085 _HEX_TABLE = '0123456789abcdef'
5086
5087
5088 def random_uuidv4():
5089     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5090
5091
5092 def make_dir(path, to_screen=None):
5093     try:
5094         dn = os.path.dirname(path)
5095         if dn and not os.path.exists(dn):
5096             os.makedirs(dn)
5097         return True
5098     except OSError as err:
5099         if callable(to_screen) is not None:
5100             to_screen('unable to create directory ' + error_to_compat_str(err))
5101         return False
5102
5103
5104 def get_executable_path():
5105     from .update import _get_variant_and_executable_path
5106
5107     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5108
5109
5110 def load_plugins(name, suffix, namespace):
5111     classes = {}
5112     with contextlib.suppress(FileNotFoundError):
5113         plugins_spec = importlib.util.spec_from_file_location(
5114             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5115         plugins = importlib.util.module_from_spec(plugins_spec)
5116         sys.modules[plugins_spec.name] = plugins
5117         plugins_spec.loader.exec_module(plugins)
5118         for name in dir(plugins):
5119             if name in namespace:
5120                 continue
5121             if not name.endswith(suffix):
5122                 continue
5123             klass = getattr(plugins, name)
5124             classes[name] = namespace[name] = klass
5125     return classes
5126
5127
5128 def traverse_obj(
5129         obj, *path_list, default=None, expected_type=None, get_all=True,
5130         casesense=True, is_user_input=False, traverse_string=False):
5131     ''' Traverse nested list/dict/tuple
5132     @param path_list        A list of paths which are checked one by one.
5133                             Each path is a list of keys where each key is a:
5134                               - None:     Do nothing
5135                               - string:   A dictionary key
5136                               - int:      An index into a list
5137                               - tuple:    A list of keys all of which will be traversed
5138                               - Ellipsis: Fetch all values in the object
5139                               - Function: Takes the key and value as arguments
5140                                           and returns whether the key matches or not
5141     @param default          Default value to return
5142     @param expected_type    Only accept final value of this type (Can also be any callable)
5143     @param get_all          Return all the values obtained from a path or only the first one
5144     @param casesense        Whether to consider dictionary keys as case sensitive
5145     @param is_user_input    Whether the keys are generated from user input. If True,
5146                             strings are converted to int/slice if necessary
5147     @param traverse_string  Whether to traverse inside strings. If True, any
5148                             non-compatible object will also be converted into a string
5149     # TODO: Write tests
5150     '''
5151     if not casesense:
5152         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
5153         path_list = (map(_lower, variadic(path)) for path in path_list)
5154
5155     def _traverse_obj(obj, path, _current_depth=0):
5156         nonlocal depth
5157         path = tuple(variadic(path))
5158         for i, key in enumerate(path):
5159             if None in (key, obj):
5160                 return obj
5161             if isinstance(key, (list, tuple)):
5162                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
5163                 key = ...
5164             if key is ...:
5165                 obj = (obj.values() if isinstance(obj, dict)
5166                        else obj if isinstance(obj, (list, tuple, LazyList))
5167                        else str(obj) if traverse_string else [])
5168                 _current_depth += 1
5169                 depth = max(depth, _current_depth)
5170                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
5171             elif callable(key):
5172                 if isinstance(obj, (list, tuple, LazyList)):
5173                     obj = enumerate(obj)
5174                 elif isinstance(obj, dict):
5175                     obj = obj.items()
5176                 else:
5177                     if not traverse_string:
5178                         return None
5179                     obj = str(obj)
5180                 _current_depth += 1
5181                 depth = max(depth, _current_depth)
5182                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
5183             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
5184                 obj = (obj.get(key) if casesense or (key in obj)
5185                        else next((v for k, v in obj.items() if _lower(k) == key), None))
5186             else:
5187                 if is_user_input:
5188                     key = (int_or_none(key) if ':' not in key
5189                            else slice(*map(int_or_none, key.split(':'))))
5190                     if key == slice(None):
5191                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
5192                 if not isinstance(key, (int, slice)):
5193                     return None
5194                 if not isinstance(obj, (list, tuple, LazyList)):
5195                     if not traverse_string:
5196                         return None
5197                     obj = str(obj)
5198                 try:
5199                     obj = obj[key]
5200                 except IndexError:
5201                     return None
5202         return obj
5203
5204     if isinstance(expected_type, type):
5205         type_test = lambda val: val if isinstance(val, expected_type) else None
5206     elif expected_type is not None:
5207         type_test = expected_type
5208     else:
5209         type_test = lambda val: val
5210
5211     for path in path_list:
5212         depth = 0
5213         val = _traverse_obj(obj, path)
5214         if val is not None:
5215             if depth:
5216                 for _ in range(depth - 1):
5217                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5218                 val = [v for v in map(type_test, val) if v is not None]
5219                 if val:
5220                     return val if get_all else val[0]
5221             else:
5222                 val = type_test(val)
5223                 if val is not None:
5224                     return val
5225     return default
5226
5227
5228 def traverse_dict(dictn, keys, casesense=True):
5229     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5230                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5231     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5232
5233
5234 def get_first(obj, keys, **kwargs):
5235     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5236
5237
5238 def variadic(x, allowed_types=(str, bytes, dict)):
5239     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5240
5241
5242 def decode_base(value, digits):
5243     # This will convert given base-x string to scalar (long or int)
5244     table = {char: index for index, char in enumerate(digits)}
5245     result = 0
5246     base = len(digits)
5247     for chr in value:
5248         result *= base
5249         result += table[chr]
5250     return result
5251
5252
5253 def time_seconds(**kwargs):
5254     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5255     return t.timestamp()
5256
5257
5258 # create a JSON Web Signature (jws) with HS256 algorithm
5259 # the resulting format is in JWS Compact Serialization
5260 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5261 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5262 def jwt_encode_hs256(payload_data, key, headers={}):
5263     header_data = {
5264         'alg': 'HS256',
5265         'typ': 'JWT',
5266     }
5267     if headers:
5268         header_data.update(headers)
5269     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5270     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5271     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5272     signature_b64 = base64.b64encode(h.digest())
5273     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5274     return token
5275
5276
5277 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5278 def jwt_decode_hs256(jwt):
5279     header_b64, payload_b64, signature_b64 = jwt.split('.')
5280     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5281     return payload_data
5282
5283
5284 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5285
5286
5287 @functools.cache
5288 def supports_terminal_sequences(stream):
5289     if compat_os_name == 'nt':
5290         if not WINDOWS_VT_MODE:
5291             return False
5292     elif not os.getenv('TERM'):
5293         return False
5294     try:
5295         return stream.isatty()
5296     except BaseException:
5297         return False
5298
5299
5300 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5301     if get_windows_version() < (10, 0, 10586):
5302         return
5303     global WINDOWS_VT_MODE
5304     try:
5305         Popen.run('', shell=True)
5306     except Exception:
5307         return
5308
5309     WINDOWS_VT_MODE = True
5310     supports_terminal_sequences.cache_clear()
5311
5312
5313 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5314
5315
5316 def remove_terminal_sequences(string):
5317     return _terminal_sequences_re.sub('', string)
5318
5319
5320 def number_of_digits(number):
5321     return len('%d' % number)
5322
5323
5324 def join_nonempty(*values, delim='-', from_dict=None):
5325     if from_dict is not None:
5326         values = map(from_dict.get, values)
5327     return delim.join(map(str, filter(None, values)))
5328
5329
5330 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5331     """
5332     Find the largest format dimensions in terms of video width and, for each thumbnail:
5333     * Modify the URL: Match the width with the provided regex and replace with the former width
5334     * Update dimensions
5335
5336     This function is useful with video services that scale the provided thumbnails on demand
5337     """
5338     _keys = ('width', 'height')
5339     max_dimensions = max(
5340         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5341         default=(0, 0))
5342     if not max_dimensions[0]:
5343         return thumbnails
5344     return [
5345         merge_dicts(
5346             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5347             dict(zip(_keys, max_dimensions)), thumbnail)
5348         for thumbnail in thumbnails
5349     ]
5350
5351
5352 def parse_http_range(range):
5353     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5354     if not range:
5355         return None, None, None
5356     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5357     if not crg:
5358         return None, None, None
5359     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5360
5361
5362 def read_stdin(what):
5363     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5364     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5365     return sys.stdin
5366
5367
5368 class Config:
5369     own_args = None
5370     parsed_args = None
5371     filename = None
5372     __initialized = False
5373
5374     def __init__(self, parser, label=None):
5375         self.parser, self.label = parser, label
5376         self._loaded_paths, self.configs = set(), []
5377
5378     def init(self, args=None, filename=None):
5379         assert not self.__initialized
5380         directory = ''
5381         if filename:
5382             location = os.path.realpath(filename)
5383             directory = os.path.dirname(location)
5384             if location in self._loaded_paths:
5385                 return False
5386             self._loaded_paths.add(location)
5387
5388         self.own_args, self.__initialized = args, True
5389         opts, _ = self.parser.parse_known_args(args)
5390         self.parsed_args, self.filename = args, filename
5391
5392         for location in opts.config_locations or []:
5393             if location == '-':
5394                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5395                 continue
5396             location = os.path.join(directory, expand_path(location))
5397             if os.path.isdir(location):
5398                 location = os.path.join(location, 'yt-dlp.conf')
5399             if not os.path.exists(location):
5400                 self.parser.error(f'config location {location} does not exist')
5401             self.append_config(self.read_file(location), location)
5402         return True
5403
5404     def __str__(self):
5405         label = join_nonempty(
5406             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5407             delim=' ')
5408         return join_nonempty(
5409             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5410             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5411             delim='\n')
5412
5413     @staticmethod
5414     def read_file(filename, default=[]):
5415         try:
5416             optionf = open(filename)
5417         except OSError:
5418             return default  # silently skip if file is not present
5419         try:
5420             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5421             contents = optionf.read()
5422             res = shlex.split(contents, comments=True)
5423         finally:
5424             optionf.close()
5425         return res
5426
5427     @staticmethod
5428     def hide_login_info(opts):
5429         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5430         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5431
5432         def _scrub_eq(o):
5433             m = eqre.match(o)
5434             if m:
5435                 return m.group('key') + '=PRIVATE'
5436             else:
5437                 return o
5438
5439         opts = list(map(_scrub_eq, opts))
5440         for idx, opt in enumerate(opts):
5441             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5442                 opts[idx + 1] = 'PRIVATE'
5443         return opts
5444
5445     def append_config(self, *args, label=None):
5446         config = type(self)(self.parser, label)
5447         config._loaded_paths = self._loaded_paths
5448         if config.init(*args):
5449             self.configs.append(config)
5450
5451     @property
5452     def all_args(self):
5453         for config in reversed(self.configs):
5454             yield from config.all_args
5455         yield from self.parsed_args or []
5456
5457     def parse_known_args(self, **kwargs):
5458         return self.parser.parse_known_args(self.all_args, **kwargs)
5459
5460     def parse_args(self):
5461         return self.parser.parse_args(self.all_args)
5462
5463
5464 class WebSocketsWrapper():
5465     """Wraps websockets module to use in non-async scopes"""
5466     pool = None
5467
5468     def __init__(self, url, headers=None, connect=True):
5469         self.loop = asyncio.new_event_loop()
5470         # XXX: "loop" is deprecated
5471         self.conn = websockets.connect(
5472             url, extra_headers=headers, ping_interval=None,
5473             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5474         if connect:
5475             self.__enter__()
5476         atexit.register(self.__exit__, None, None, None)
5477
5478     def __enter__(self):
5479         if not self.pool:
5480             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5481         return self
5482
5483     def send(self, *args):
5484         self.run_with_loop(self.pool.send(*args), self.loop)
5485
5486     def recv(self, *args):
5487         return self.run_with_loop(self.pool.recv(*args), self.loop)
5488
5489     def __exit__(self, type, value, traceback):
5490         try:
5491             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5492         finally:
5493             self.loop.close()
5494             self._cancel_all_tasks(self.loop)
5495
5496     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5497     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5498     @staticmethod
5499     def run_with_loop(main, loop):
5500         if not asyncio.iscoroutine(main):
5501             raise ValueError(f'a coroutine was expected, got {main!r}')
5502
5503         try:
5504             return loop.run_until_complete(main)
5505         finally:
5506             loop.run_until_complete(loop.shutdown_asyncgens())
5507             if hasattr(loop, 'shutdown_default_executor'):
5508                 loop.run_until_complete(loop.shutdown_default_executor())
5509
5510     @staticmethod
5511     def _cancel_all_tasks(loop):
5512         to_cancel = asyncio.all_tasks(loop)
5513
5514         if not to_cancel:
5515             return
5516
5517         for task in to_cancel:
5518             task.cancel()
5519
5520         # XXX: "loop" is removed in python 3.10+
5521         loop.run_until_complete(
5522             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5523
5524         for task in to_cancel:
5525             if task.cancelled():
5526                 continue
5527             if task.exception() is not None:
5528                 loop.call_exception_handler({
5529                     'message': 'unhandled exception during asyncio.run() shutdown',
5530                     'exception': task.exception(),
5531                     'task': task,
5532                 })
5533
5534
5535 def merge_headers(*dicts):
5536     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5537     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5538
5539
5540 class classproperty:
5541     """classmethod(property(func)) that works in py < 3.9"""
5542
5543     def __init__(self, func):
5544         functools.update_wrapper(self, func)
5545         self.func = func
5546
5547     def __get__(self, _, cls):
5548         return self.func(cls)
5549
5550
5551 class Namespace(types.SimpleNamespace):
5552     """Immutable namespace"""
5553
5554     def __iter__(self):
5555         return iter(self.__dict__.values())
5556
5557     @property
5558     def items_(self):
5559         return self.__dict__.items()
5560
5561
5562 # Deprecated
5563 has_certifi = bool(certifi)
5564 has_websockets = bool(websockets)