yt_dlp/utils.py

   1 import asyncio
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import collections.abc
   9 import contextlib
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import html.entities
  18 import html.parser
  19 import http.client
  20 import http.cookiejar
  21 import importlib.util
  22 import inspect
  23 import io
  24 import itertools
  25 import json
  26 import locale
  27 import math
  28 import mimetypes
  29 import operator
  30 import os
  31 import platform
  32 import random
  33 import re
  34 import shlex
  35 import socket
  36 import ssl
  37 import struct
  38 import subprocess
  39 import sys
  40 import tempfile
  41 import time
  42 import traceback
  43 import types
  44 import unicodedata
  45 import urllib.error
  46 import urllib.parse
  47 import urllib.request
  48 import xml.etree.ElementTree
  49 import zlib
  50
  51 from .compat import functools  # isort: split
  52 from .compat import (
  53     compat_etree_fromstring,
  54     compat_expanduser,
  55     compat_HTMLParseError,
  56     compat_os_name,
  57     compat_shlex_quote,
  58 )
  59 from .dependencies import brotli, certifi, websockets, xattr
  60 from .socks import ProxyType, sockssocket
  61
  62
  63 def register_socks_protocols():
  64     # "Register" SOCKS protocols
  65     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  66     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  67     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  68         if scheme not in urllib.parse.uses_netloc:
  69             urllib.parse.uses_netloc.append(scheme)
  70
  71
  72 # This is not clearly defined otherwise
  73 compiled_regex_type = type(re.compile(''))
  74
  75
  76 def random_user_agent():
  77     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  78     _CHROME_VERSIONS = (
  79         '90.0.4430.212',
  80         '90.0.4430.24',
  81         '90.0.4430.70',
  82         '90.0.4430.72',
  83         '90.0.4430.85',
  84         '90.0.4430.93',
  85         '91.0.4472.101',
  86         '91.0.4472.106',
  87         '91.0.4472.114',
  88         '91.0.4472.124',
  89         '91.0.4472.164',
  90         '91.0.4472.19',
  91         '91.0.4472.77',
  92         '92.0.4515.107',
  93         '92.0.4515.115',
  94         '92.0.4515.131',
  95         '92.0.4515.159',
  96         '92.0.4515.43',
  97         '93.0.4556.0',
  98         '93.0.4577.15',
  99         '93.0.4577.63',
 100         '93.0.4577.82',
 101         '94.0.4606.41',
 102         '94.0.4606.54',
 103         '94.0.4606.61',
 104         '94.0.4606.71',
 105         '94.0.4606.81',
 106         '94.0.4606.85',
 107         '95.0.4638.17',
 108         '95.0.4638.50',
 109         '95.0.4638.54',
 110         '95.0.4638.69',
 111         '95.0.4638.74',
 112         '96.0.4664.18',
 113         '96.0.4664.45',
 114         '96.0.4664.55',
 115         '96.0.4664.93',
 116         '97.0.4692.20',
 117     )
 118     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 119
 120
 121 SUPPORTED_ENCODINGS = [
 122     'gzip', 'deflate'
 123 ]
 124 if brotli:
 125     SUPPORTED_ENCODINGS.append('br')
 126
 127 std_headers = {
 128     'User-Agent': random_user_agent(),
 129     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 130     'Accept-Language': 'en-us,en;q=0.5',
 131     'Sec-Fetch-Mode': 'navigate',
 132 }
 133
 134
 135 USER_AGENTS = {
 136     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 137 }
 138
 139
 140 NO_DEFAULT = object()
 141 IDENTITY = lambda x: x
 142
 143 ENGLISH_MONTH_NAMES = [
 144     'January', 'February', 'March', 'April', 'May', 'June',
 145     'July', 'August', 'September', 'October', 'November', 'December']
 146
 147 MONTH_NAMES = {
 148     'en': ENGLISH_MONTH_NAMES,
 149     'fr': [
 150         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 151         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 152 }
 153
 154 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
 155 TIMEZONE_NAMES = {
 156     'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
 157     'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
 158     'EST': -5, 'EDT': -4,  # Eastern
 159     'CST': -6, 'CDT': -5,  # Central
 160     'MST': -7, 'MDT': -6,  # Mountain
 161     'PST': -8, 'PDT': -7   # Pacific
 162 }
 163
 164 # needed for sanitizing filenames in restricted mode
 165 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 166                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 167                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 168
 169 DATE_FORMATS = (
 170     '%d %B %Y',
 171     '%d %b %Y',
 172     '%B %d %Y',
 173     '%B %dst %Y',
 174     '%B %dnd %Y',
 175     '%B %drd %Y',
 176     '%B %dth %Y',
 177     '%b %d %Y',
 178     '%b %dst %Y',
 179     '%b %dnd %Y',
 180     '%b %drd %Y',
 181     '%b %dth %Y',
 182     '%b %dst %Y %I:%M',
 183     '%b %dnd %Y %I:%M',
 184     '%b %drd %Y %I:%M',
 185     '%b %dth %Y %I:%M',
 186     '%Y %m %d',
 187     '%Y-%m-%d',
 188     '%Y.%m.%d.',
 189     '%Y/%m/%d',
 190     '%Y/%m/%d %H:%M',
 191     '%Y/%m/%d %H:%M:%S',
 192     '%Y%m%d%H%M',
 193     '%Y%m%d%H%M%S',
 194     '%Y%m%d',
 195     '%Y-%m-%d %H:%M',
 196     '%Y-%m-%d %H:%M:%S',
 197     '%Y-%m-%d %H:%M:%S.%f',
 198     '%Y-%m-%d %H:%M:%S:%f',
 199     '%d.%m.%Y %H:%M',
 200     '%d.%m.%Y %H.%M',
 201     '%Y-%m-%dT%H:%M:%SZ',
 202     '%Y-%m-%dT%H:%M:%S.%fZ',
 203     '%Y-%m-%dT%H:%M:%S.%f0Z',
 204     '%Y-%m-%dT%H:%M:%S',
 205     '%Y-%m-%dT%H:%M:%S.%f',
 206     '%Y-%m-%dT%H:%M',
 207     '%b %d %Y at %H:%M',
 208     '%b %d %Y at %H:%M:%S',
 209     '%B %d %Y at %H:%M',
 210     '%B %d %Y at %H:%M:%S',
 211     '%H:%M %d-%b-%Y',
 212 )
 213
 214 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 215 DATE_FORMATS_DAY_FIRST.extend([
 216     '%d-%m-%Y',
 217     '%d.%m.%Y',
 218     '%d.%m.%y',
 219     '%d/%m/%Y',
 220     '%d/%m/%y',
 221     '%d/%m/%Y %H:%M:%S',
 222     '%d-%m-%Y %H:%M',
 223 ])
 224
 225 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 226 DATE_FORMATS_MONTH_FIRST.extend([
 227     '%m-%d-%Y',
 228     '%m.%d.%Y',
 229     '%m/%d/%Y',
 230     '%m/%d/%y',
 231     '%m/%d/%Y %H:%M:%S',
 232 ])
 233
 234 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 235 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
 236
 237 NUMBER_RE = r'\d+(?:\.\d+)?'
 238
 239
 240 @functools.cache
 241 def preferredencoding():
 242     """Get preferred encoding.
 243
 244     Returns the best encoding scheme for the system, based on
 245     locale.getpreferredencoding() and some further tweaks.
 246     """
 247     try:
 248         pref = locale.getpreferredencoding()
 249         'TEST'.encode(pref)
 250     except Exception:
 251         pref = 'UTF-8'
 252
 253     return pref
 254
 255
 256 def write_json_file(obj, fn):
 257     """ Encode obj as JSON and write it to fn, atomically if possible """
 258
 259     tf = tempfile.NamedTemporaryFile(
 260         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 261         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 262
 263     try:
 264         with tf:
 265             json.dump(obj, tf, ensure_ascii=False)
 266         if sys.platform == 'win32':
 267             # Need to remove existing file on Windows, else os.rename raises
 268             # WindowsError or FileExistsError.
 269             with contextlib.suppress(OSError):
 270                 os.unlink(fn)
 271         with contextlib.suppress(OSError):
 272             mask = os.umask(0)
 273             os.umask(mask)
 274             os.chmod(tf.name, 0o666 & ~mask)
 275         os.rename(tf.name, fn)
 276     except Exception:
 277         with contextlib.suppress(OSError):
 278             os.remove(tf.name)
 279         raise
 280
 281
 282 def find_xpath_attr(node, xpath, key, val=None):
 283     """ Find the xpath xpath[@key=val] """
 284     assert re.match(r'^[a-zA-Z_-]+$', key)
 285     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 286     return node.find(expr)
 287
 288 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 289 # the namespace parameter
 290
 291
 292 def xpath_with_ns(path, ns_map):
 293     components = [c.split(':') for c in path.split('/')]
 294     replaced = []
 295     for c in components:
 296         if len(c) == 1:
 297             replaced.append(c[0])
 298         else:
 299             ns, tag = c
 300             replaced.append('{%s}%s' % (ns_map[ns], tag))
 301     return '/'.join(replaced)
 302
 303
 304 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 305     def _find_xpath(xpath):
 306         return node.find(xpath)
 307
 308     if isinstance(xpath, str):
 309         n = _find_xpath(xpath)
 310     else:
 311         for xp in xpath:
 312             n = _find_xpath(xp)
 313             if n is not None:
 314                 break
 315
 316     if n is None:
 317         if default is not NO_DEFAULT:
 318             return default
 319         elif fatal:
 320             name = xpath if name is None else name
 321             raise ExtractorError('Could not find XML element %s' % name)
 322         else:
 323             return None
 324     return n
 325
 326
 327 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 328     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 329     if n is None or n == default:
 330         return n
 331     if n.text is None:
 332         if default is not NO_DEFAULT:
 333             return default
 334         elif fatal:
 335             name = xpath if name is None else name
 336             raise ExtractorError('Could not find XML element\'s text %s' % name)
 337         else:
 338             return None
 339     return n.text
 340
 341
 342 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 343     n = find_xpath_attr(node, xpath, key)
 344     if n is None:
 345         if default is not NO_DEFAULT:
 346             return default
 347         elif fatal:
 348             name = f'{xpath}[@{key}]' if name is None else name
 349             raise ExtractorError('Could not find XML attribute %s' % name)
 350         else:
 351             return None
 352     return n.attrib[key]
 353
 354
 355 def get_element_by_id(id, html, **kwargs):
 356     """Return the content of the tag with the specified ID in the passed HTML document"""
 357     return get_element_by_attribute('id', id, html, **kwargs)
 358
 359
 360 def get_element_html_by_id(id, html, **kwargs):
 361     """Return the html of the tag with the specified ID in the passed HTML document"""
 362     return get_element_html_by_attribute('id', id, html, **kwargs)
 363
 364
 365 def get_element_by_class(class_name, html):
 366     """Return the content of the first tag with the specified class in the passed HTML document"""
 367     retval = get_elements_by_class(class_name, html)
 368     return retval[0] if retval else None
 369
 370
 371 def get_element_html_by_class(class_name, html):
 372     """Return the html of the first tag with the specified class in the passed HTML document"""
 373     retval = get_elements_html_by_class(class_name, html)
 374     return retval[0] if retval else None
 375
 376
 377 def get_element_by_attribute(attribute, value, html, **kwargs):
 378     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 379     return retval[0] if retval else None
 380
 381
 382 def get_element_html_by_attribute(attribute, value, html, **kargs):
 383     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 384     return retval[0] if retval else None
 385
 386
 387 def get_elements_by_class(class_name, html, **kargs):
 388     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 389     return get_elements_by_attribute(
 390         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 391         html, escape_value=False)
 392
 393
 394 def get_elements_html_by_class(class_name, html):
 395     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 396     return get_elements_html_by_attribute(
 397         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 398         html, escape_value=False)
 399
 400
 401 def get_elements_by_attribute(*args, **kwargs):
 402     """Return the content of the tag with the specified attribute in the passed HTML document"""
 403     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 404
 405
 406 def get_elements_html_by_attribute(*args, **kwargs):
 407     """Return the html of the tag with the specified attribute in the passed HTML document"""
 408     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 409
 410
 411 def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
 412     """
 413     Return the text (content) and the html (whole) of the tag with the specified
 414     attribute in the passed HTML document
 415     """
 416
 417     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 418
 419     value = re.escape(value) if escape_value else value
 420
 421     partial_element_re = rf'''(?x)
 422         <(?P<tag>{tag})
 423          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 424          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 425         '''
 426
 427     for m in re.finditer(partial_element_re, html):
 428         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 429
 430         yield (
 431             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 432             whole
 433         )
 434
 435
 436 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
 437     """
 438     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 439     closing tag for the first opening tag it has encountered, and can be used
 440     as a context manager
 441     """
 442
 443     class HTMLBreakOnClosingTagException(Exception):
 444         pass
 445
 446     def __init__(self):
 447         self.tagstack = collections.deque()
 448         html.parser.HTMLParser.__init__(self)
 449
 450     def __enter__(self):
 451         return self
 452
 453     def __exit__(self, *_):
 454         self.close()
 455
 456     def close(self):
 457         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 458         # so data remains buffered; we no longer have any interest in it, thus
 459         # override this method to discard it
 460         pass
 461
 462     def handle_starttag(self, tag, _):
 463         self.tagstack.append(tag)
 464
 465     def handle_endtag(self, tag):
 466         if not self.tagstack:
 467             raise compat_HTMLParseError('no tags in the stack')
 468         while self.tagstack:
 469             inner_tag = self.tagstack.pop()
 470             if inner_tag == tag:
 471                 break
 472         else:
 473             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 474         if not self.tagstack:
 475             raise self.HTMLBreakOnClosingTagException()
 476
 477
 478 def get_element_text_and_html_by_tag(tag, html):
 479     """
 480     For the first element with the specified tag in the passed HTML document
 481     return its' content (text) and the whole element (html)
 482     """
 483     def find_or_raise(haystack, needle, exc):
 484         try:
 485             return haystack.index(needle)
 486         except ValueError:
 487             raise exc
 488     closing_tag = f'</{tag}>'
 489     whole_start = find_or_raise(
 490         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 491     content_start = find_or_raise(
 492         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 493     content_start += whole_start + 1
 494     with HTMLBreakOnClosingTagParser() as parser:
 495         parser.feed(html[whole_start:content_start])
 496         if not parser.tagstack or parser.tagstack[0] != tag:
 497             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 498         offset = content_start
 499         while offset < len(html):
 500             next_closing_tag_start = find_or_raise(
 501                 html[offset:], closing_tag,
 502                 compat_HTMLParseError(f'closing {tag} tag not found'))
 503             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 504             try:
 505                 parser.feed(html[offset:offset + next_closing_tag_end])
 506                 offset += next_closing_tag_end
 507             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 508                 return html[content_start:offset + next_closing_tag_start], \
 509                     html[whole_start:offset + next_closing_tag_end]
 510         raise compat_HTMLParseError('unexpected end of html')
 511
 512
 513 class HTMLAttributeParser(html.parser.HTMLParser):
 514     """Trivial HTML parser to gather the attributes for a single element"""
 515
 516     def __init__(self):
 517         self.attrs = {}
 518         html.parser.HTMLParser.__init__(self)
 519
 520     def handle_starttag(self, tag, attrs):
 521         self.attrs = dict(attrs)
 522
 523
 524 class HTMLListAttrsParser(html.parser.HTMLParser):
 525     """HTML parser to gather the attributes for the elements of a list"""
 526
 527     def __init__(self):
 528         html.parser.HTMLParser.__init__(self)
 529         self.items = []
 530         self._level = 0
 531
 532     def handle_starttag(self, tag, attrs):
 533         if tag == 'li' and self._level == 0:
 534             self.items.append(dict(attrs))
 535         self._level += 1
 536
 537     def handle_endtag(self, tag):
 538         self._level -= 1
 539
 540
 541 def extract_attributes(html_element):
 542     """Given a string for an HTML element such as
 543     <el
 544          a="foo" B="bar" c="&98;az" d=boz
 545          empty= noval entity="&amp;"
 546          sq='"' dq="'"
 547     >
 548     Decode and return a dictionary of attributes.
 549     {
 550         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 551         'empty': '', 'noval': None, 'entity': '&',
 552         'sq': '"', 'dq': '\''
 553     }.
 554     """
 555     parser = HTMLAttributeParser()
 556     with contextlib.suppress(compat_HTMLParseError):
 557         parser.feed(html_element)
 558         parser.close()
 559     return parser.attrs
 560
 561
 562 def parse_list(webpage):
 563     """Given a string for an series of HTML <li> elements,
 564     return a dictionary of their attributes"""
 565     parser = HTMLListAttrsParser()
 566     parser.feed(webpage)
 567     parser.close()
 568     return parser.items
 569
 570
 571 def clean_html(html):
 572     """Clean an HTML snippet into a readable string"""
 573
 574     if html is None:  # Convenience for sanitizing descriptions etc.
 575         return html
 576
 577     html = re.sub(r'\s+', ' ', html)
 578     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 579     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 580     # Strip html tags
 581     html = re.sub('<.*?>', '', html)
 582     # Replace html entities
 583     html = unescapeHTML(html)
 584     return html.strip()
 585
 586
 587 class LenientJSONDecoder(json.JSONDecoder):
 588     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 589         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 590         super().__init__(*args, **kwargs)
 591
 592     def decode(self, s):
 593         if self.transform_source:
 594             s = self.transform_source(s)
 595         try:
 596             if self.ignore_extra:
 597                 return self.raw_decode(s.lstrip())[0]
 598             return super().decode(s)
 599         except json.JSONDecodeError as e:
 600             if e.pos is not None:
 601                 raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
 602             raise
 603
 604
 605 def sanitize_open(filename, open_mode):
 606     """Try to open the given filename, and slightly tweak it if this fails.
 607
 608     Attempts to open the given filename. If this fails, it tries to change
 609     the filename slightly, step by step, until it's either able to open it
 610     or it fails and raises a final exception, like the standard open()
 611     function.
 612
 613     It returns the tuple (stream, definitive_file_name).
 614     """
 615     if filename == '-':
 616         if sys.platform == 'win32':
 617             import msvcrt
 618
 619             # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
 620             with contextlib.suppress(io.UnsupportedOperation):
 621                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 622         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 623
 624     for attempt in range(2):
 625         try:
 626             try:
 627                 if sys.platform == 'win32':
 628                     # FIXME: An exclusive lock also locks the file from being read.
 629                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 630                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 631                     raise LockingUnsupportedError()
 632                 stream = locked_file(filename, open_mode, block=False).__enter__()
 633             except OSError:
 634                 stream = open(filename, open_mode)
 635             return stream, filename
 636         except OSError as err:
 637             if attempt or err.errno in (errno.EACCES,):
 638                 raise
 639             old_filename, filename = filename, sanitize_path(filename)
 640             if old_filename == filename:
 641                 raise
 642
 643
 644 def timeconvert(timestr):
 645     """Convert RFC 2822 defined time string into system timestamp"""
 646     timestamp = None
 647     timetuple = email.utils.parsedate_tz(timestr)
 648     if timetuple is not None:
 649         timestamp = email.utils.mktime_tz(timetuple)
 650     return timestamp
 651
 652
 653 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 654     """Sanitizes a string so it could be used as part of a filename.
 655     @param restricted   Use a stricter subset of allowed characters
 656     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 657                         If unset, yt-dlp's new sanitization rules are in effect
 658     """
 659     if s == '':
 660         return ''
 661
 662     def replace_insane(char):
 663         if restricted and char in ACCENT_CHARS:
 664             return ACCENT_CHARS[char]
 665         elif not restricted and char == '\n':
 666             return '\0 '
 667         elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
 668             # Replace with their full-width unicode counterparts
 669             return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
 670         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 671             return ''
 672         elif char == '"':
 673             return '' if restricted else '\''
 674         elif char == ':':
 675             return '\0_\0-' if restricted else '\0 \0-'
 676         elif char in '\\/|*<>':
 677             return '\0_'
 678         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 679             return '\0_'
 680         return char
 681
 682     if restricted and is_id is NO_DEFAULT:
 683         s = unicodedata.normalize('NFKC', s)
 684     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 685     result = ''.join(map(replace_insane, s))
 686     if is_id is NO_DEFAULT:
 687         result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
 688         STRIP_RE = r'(?:\0.|[ _-])*'
 689         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 690     result = result.replace('\0', '') or '_'
 691
 692     if not is_id:
 693         while '__' in result:
 694             result = result.replace('__', '_')
 695         result = result.strip('_')
 696         # Common case of "Foreign band name - English song title"
 697         if restricted and result.startswith('-_'):
 698             result = result[2:]
 699         if result.startswith('-'):
 700             result = '_' + result[len('-'):]
 701         result = result.lstrip('.')
 702         if not result:
 703             result = '_'
 704     return result
 705
 706
 707 def sanitize_path(s, force=False):
 708     """Sanitizes and normalizes path on Windows"""
 709     if sys.platform == 'win32':
 710         force = False
 711         drive_or_unc, _ = os.path.splitdrive(s)
 712     elif force:
 713         drive_or_unc = ''
 714     else:
 715         return s
 716
 717     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 718     if drive_or_unc:
 719         norm_path.pop(0)
 720     sanitized_path = [
 721         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 722         for path_part in norm_path]
 723     if drive_or_unc:
 724         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 725     elif force and s and s[0] == os.path.sep:
 726         sanitized_path.insert(0, os.path.sep)
 727     return os.path.join(*sanitized_path)
 728
 729
 730 def sanitize_url(url, *, scheme='http'):
 731     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 732     # the number of unwanted failures due to missing protocol
 733     if url is None:
 734         return
 735     elif url.startswith('//'):
 736         return f'{scheme}:{url}'
 737     # Fix some common typos seen so far
 738     COMMON_TYPOS = (
 739         # https://github.com/ytdl-org/youtube-dl/issues/15649
 740         (r'^httpss://', r'https://'),
 741         # https://bx1.be/lives/direct-tv/
 742         (r'^rmtp([es]?)://', r'rtmp\1://'),
 743     )
 744     for mistake, fixup in COMMON_TYPOS:
 745         if re.match(mistake, url):
 746             return re.sub(mistake, fixup, url)
 747     return url
 748
 749
 750 def extract_basic_auth(url):
 751     parts = urllib.parse.urlsplit(url)
 752     if parts.username is None:
 753         return url, None
 754     url = urllib.parse.urlunsplit(parts._replace(netloc=(
 755         parts.hostname if parts.port is None
 756         else '%s:%d' % (parts.hostname, parts.port))))
 757     auth_payload = base64.b64encode(
 758         ('%s:%s' % (parts.username, parts.password or '')).encode())
 759     return url, f'Basic {auth_payload.decode()}'
 760
 761
 762 def sanitized_Request(url, *args, **kwargs):
 763     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 764     if auth_header is not None:
 765         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 766         headers['Authorization'] = auth_header
 767     return urllib.request.Request(url, *args, **kwargs)
 768
 769
 770 def expand_path(s):
 771     """Expand shell variables and ~"""
 772     return os.path.expandvars(compat_expanduser(s))
 773
 774
 775 def orderedSet(iterable, *, lazy=False):
 776     """Remove all duplicates from the input iterable"""
 777     def _iter():
 778         seen = []  # Do not use set since the items can be unhashable
 779         for x in iterable:
 780             if x not in seen:
 781                 seen.append(x)
 782                 yield x
 783
 784     return _iter() if lazy else list(_iter())
 785
 786
 787 def _htmlentity_transform(entity_with_semicolon):
 788     """Transforms an HTML entity to a character."""
 789     entity = entity_with_semicolon[:-1]
 790
 791     # Known non-numeric HTML entity
 792     if entity in html.entities.name2codepoint:
 793         return chr(html.entities.name2codepoint[entity])
 794
 795     # TODO: HTML5 allows entities without a semicolon.
 796     # E.g. '&Eacuteric' should be decoded as 'Éric'.
 797     if entity_with_semicolon in html.entities.html5:
 798         return html.entities.html5[entity_with_semicolon]
 799
 800     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 801     if mobj is not None:
 802         numstr = mobj.group(1)
 803         if numstr.startswith('x'):
 804             base = 16
 805             numstr = '0%s' % numstr
 806         else:
 807             base = 10
 808         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 809         with contextlib.suppress(ValueError):
 810             return chr(int(numstr, base))
 811
 812     # Unknown entity in name, return its literal representation
 813     return '&%s;' % entity
 814
 815
 816 def unescapeHTML(s):
 817     if s is None:
 818         return None
 819     assert isinstance(s, str)
 820
 821     return re.sub(
 822         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 823
 824
 825 def escapeHTML(text):
 826     return (
 827         text
 828         .replace('&', '&amp;')
 829         .replace('<', '&lt;')
 830         .replace('>', '&gt;')
 831         .replace('"', '&quot;')
 832         .replace("'", '&#39;')
 833     )
 834
 835
 836 def process_communicate_or_kill(p, *args, **kwargs):
 837     deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
 838                         f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
 839     return Popen.communicate_or_kill(p, *args, **kwargs)
 840
 841
 842 class Popen(subprocess.Popen):
 843     if sys.platform == 'win32':
 844         _startupinfo = subprocess.STARTUPINFO()
 845         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 846     else:
 847         _startupinfo = None
 848
 849     @staticmethod
 850     def _fix_pyinstaller_ld_path(env):
 851         """Restore LD_LIBRARY_PATH when using PyInstaller
 852             Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
 853                  https://github.com/yt-dlp/yt-dlp/issues/4573
 854         """
 855         if not hasattr(sys, '_MEIPASS'):
 856             return
 857
 858         def _fix(key):
 859             orig = env.get(f'{key}_ORIG')
 860             if orig is None:
 861                 env.pop(key, None)
 862             else:
 863                 env[key] = orig
 864
 865         _fix('LD_LIBRARY_PATH')  # Linux
 866         _fix('DYLD_LIBRARY_PATH')  # macOS
 867
 868     def __init__(self, *args, env=None, text=False, **kwargs):
 869         if env is None:
 870             env = os.environ.copy()
 871         self._fix_pyinstaller_ld_path(env)
 872
 873         if text is True:
 874             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 875             kwargs.setdefault('encoding', 'utf-8')
 876             kwargs.setdefault('errors', 'replace')
 877         super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo)
 878
 879     def communicate_or_kill(self, *args, **kwargs):
 880         try:
 881             return self.communicate(*args, **kwargs)
 882         except BaseException:  # Including KeyboardInterrupt
 883             self.kill(timeout=None)
 884             raise
 885
 886     def kill(self, *, timeout=0):
 887         super().kill()
 888         if timeout != 0:
 889             self.wait(timeout=timeout)
 890
 891     @classmethod
 892     def run(cls, *args, timeout=None, **kwargs):
 893         with cls(*args, **kwargs) as proc:
 894             default = '' if proc.text_mode else b''
 895             stdout, stderr = proc.communicate_or_kill(timeout=timeout)
 896             return stdout or default, stderr or default, proc.returncode
 897
 898
 899 def get_subprocess_encoding():
 900     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 901         # For subprocess calls, encode with locale encoding
 902         # Refer to http://stackoverflow.com/a/9951851/35070
 903         encoding = preferredencoding()
 904     else:
 905         encoding = sys.getfilesystemencoding()
 906     if encoding is None:
 907         encoding = 'utf-8'
 908     return encoding
 909
 910
 911 def encodeFilename(s, for_subprocess=False):
 912     assert isinstance(s, str)
 913     return s
 914
 915
 916 def decodeFilename(b, for_subprocess=False):
 917     return b
 918
 919
 920 def encodeArgument(s):
 921     # Legacy code that uses byte strings
 922     # Uncomment the following line after fixing all post processors
 923     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
 924     return s if isinstance(s, str) else s.decode('ascii')
 925
 926
 927 def decodeArgument(b):
 928     return b
 929
 930
 931 def decodeOption(optval):
 932     if optval is None:
 933         return optval
 934     if isinstance(optval, bytes):
 935         optval = optval.decode(preferredencoding())
 936
 937     assert isinstance(optval, str)
 938     return optval
 939
 940
 941 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 942
 943
 944 def timetuple_from_msec(msec):
 945     secs, msec = divmod(msec, 1000)
 946     mins, secs = divmod(secs, 60)
 947     hrs, mins = divmod(mins, 60)
 948     return _timetuple(hrs, mins, secs, msec)
 949
 950
 951 def formatSeconds(secs, delim=':', msec=False):
 952     time = timetuple_from_msec(secs * 1000)
 953     if time.hours:
 954         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 955     elif time.minutes:
 956         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 957     else:
 958         ret = '%d' % time.seconds
 959     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 960
 961
 962 def _ssl_load_windows_store_certs(ssl_context, storename):
 963     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 964     try:
 965         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 966                  if encoding == 'x509_asn' and (
 967                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 968     except PermissionError:
 969         return
 970     for cert in certs:
 971         with contextlib.suppress(ssl.SSLError):
 972             ssl_context.load_verify_locations(cadata=cert)
 973
 974
 975 def make_HTTPS_handler(params, **kwargs):
 976     opts_check_certificate = not params.get('nocheckcertificate')
 977     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 978     context.check_hostname = opts_check_certificate
 979     if params.get('legacyserverconnect'):
 980         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 981         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 982         context.set_ciphers('DEFAULT')
 983
 984     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 985     if opts_check_certificate:
 986         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 987             context.load_verify_locations(cafile=certifi.where())
 988         else:
 989             try:
 990                 context.load_default_certs()
 991                 # Work around the issue in load_default_certs when there are bad certificates. See:
 992                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
 993                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 994             except ssl.SSLError:
 995                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 996                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 997                     for storename in ('CA', 'ROOT'):
 998                         _ssl_load_windows_store_certs(context, storename)
 999                 context.set_default_verify_paths()
1000
1001     client_certfile = params.get('client_certificate')
1002     if client_certfile:
1003         try:
1004             context.load_cert_chain(
1005                 client_certfile, keyfile=params.get('client_certificate_key'),
1006                 password=params.get('client_certificate_password'))
1007         except ssl.SSLError:
1008             raise YoutubeDLError('Unable to load client certificate')
1009
1010     # Some servers may reject requests if ALPN extension is not sent. See:
1011     # https://github.com/python/cpython/issues/85140
1012     # https://github.com/yt-dlp/yt-dlp/issues/3878
1013     with contextlib.suppress(NotImplementedError):
1014         context.set_alpn_protocols(['http/1.1'])
1015
1016     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
1017
1018
1019 def bug_reports_message(before=';'):
1020     from .update import REPOSITORY
1021
1022     msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
1023            'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')
1024
1025     before = before.rstrip()
1026     if not before or before.endswith(('.', '!', '?')):
1027         msg = msg[0].title() + msg[1:]
1028
1029     return (before + ' ' if before else '') + msg
1030
1031
1032 class YoutubeDLError(Exception):
1033     """Base exception for YoutubeDL errors."""
1034     msg = None
1035
1036     def __init__(self, msg=None):
1037         if msg is not None:
1038             self.msg = msg
1039         elif self.msg is None:
1040             self.msg = type(self).__name__
1041         super().__init__(self.msg)
1042
1043
1044 network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
1045 if hasattr(ssl, 'CertificateError'):
1046     network_exceptions.append(ssl.CertificateError)
1047 network_exceptions = tuple(network_exceptions)
1048
1049
1050 class ExtractorError(YoutubeDLError):
1051     """Error during info extraction."""
1052
1053     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1054         """ tb, if given, is the original traceback (so that it can be printed out).
1055         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1056         """
1057         if sys.exc_info()[0] in network_exceptions:
1058             expected = True
1059
1060         self.orig_msg = str(msg)
1061         self.traceback = tb
1062         self.expected = expected
1063         self.cause = cause
1064         self.video_id = video_id
1065         self.ie = ie
1066         self.exc_info = sys.exc_info()  # preserve original exception
1067         if isinstance(self.exc_info[1], ExtractorError):
1068             self.exc_info = self.exc_info[1].exc_info
1069
1070         super().__init__(''.join((
1071             format_field(ie, None, '[%s] '),
1072             format_field(video_id, None, '%s: '),
1073             msg,
1074             format_field(cause, None, ' (caused by %r)'),
1075             '' if expected else bug_reports_message())))
1076
1077     def format_traceback(self):
1078         return join_nonempty(
1079             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1080             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1081             delim='\n') or None
1082
1083
1084 class UnsupportedError(ExtractorError):
1085     def __init__(self, url):
1086         super().__init__(
1087             'Unsupported URL: %s' % url, expected=True)
1088         self.url = url
1089
1090
1091 class RegexNotFoundError(ExtractorError):
1092     """Error when a regex didn't match"""
1093     pass
1094
1095
1096 class GeoRestrictedError(ExtractorError):
1097     """Geographic restriction Error exception.
1098
1099     This exception may be thrown when a video is not available from your
1100     geographic location due to geographic restrictions imposed by a website.
1101     """
1102
1103     def __init__(self, msg, countries=None, **kwargs):
1104         kwargs['expected'] = True
1105         super().__init__(msg, **kwargs)
1106         self.countries = countries
1107
1108
1109 class UserNotLive(ExtractorError):
1110     """Error when a channel/user is not live"""
1111
1112     def __init__(self, msg=None, **kwargs):
1113         kwargs['expected'] = True
1114         super().__init__(msg or 'The channel is not currently live', **kwargs)
1115
1116
1117 class DownloadError(YoutubeDLError):
1118     """Download Error exception.
1119
1120     This exception may be thrown by FileDownloader objects if they are not
1121     configured to continue on errors. They will contain the appropriate
1122     error message.
1123     """
1124
1125     def __init__(self, msg, exc_info=None):
1126         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1127         super().__init__(msg)
1128         self.exc_info = exc_info
1129
1130
1131 class EntryNotInPlaylist(YoutubeDLError):
1132     """Entry not in playlist exception.
1133
1134     This exception will be thrown by YoutubeDL when a requested entry
1135     is not found in the playlist info_dict
1136     """
1137     msg = 'Entry not found in info'
1138
1139
1140 class SameFileError(YoutubeDLError):
1141     """Same File exception.
1142
1143     This exception will be thrown by FileDownloader objects if they detect
1144     multiple files would have to be downloaded to the same file on disk.
1145     """
1146     msg = 'Fixed output name but more than one file to download'
1147
1148     def __init__(self, filename=None):
1149         if filename is not None:
1150             self.msg += f': {filename}'
1151         super().__init__(self.msg)
1152
1153
1154 class PostProcessingError(YoutubeDLError):
1155     """Post Processing exception.
1156
1157     This exception may be raised by PostProcessor's .run() method to
1158     indicate an error in the postprocessing task.
1159     """
1160
1161
1162 class DownloadCancelled(YoutubeDLError):
1163     """ Exception raised when the download queue should be interrupted """
1164     msg = 'The download was cancelled'
1165
1166
1167 class ExistingVideoReached(DownloadCancelled):
1168     """ --break-on-existing triggered """
1169     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1170
1171
1172 class RejectedVideoReached(DownloadCancelled):
1173     """ --break-on-reject triggered """
1174     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1175
1176
1177 class MaxDownloadsReached(DownloadCancelled):
1178     """ --max-downloads limit has been reached. """
1179     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1180
1181
1182 class ReExtractInfo(YoutubeDLError):
1183     """ Video info needs to be re-extracted. """
1184
1185     def __init__(self, msg, expected=False):
1186         super().__init__(msg)
1187         self.expected = expected
1188
1189
1190 class ThrottledDownload(ReExtractInfo):
1191     """ Download speed below --throttled-rate. """
1192     msg = 'The download speed is below throttle limit'
1193
1194     def __init__(self):
1195         super().__init__(self.msg, expected=False)
1196
1197
1198 class UnavailableVideoError(YoutubeDLError):
1199     """Unavailable Format exception.
1200
1201     This exception will be thrown when a video is requested
1202     in a format that is not available for that video.
1203     """
1204     msg = 'Unable to download video'
1205
1206     def __init__(self, err=None):
1207         if err is not None:
1208             self.msg += f': {err}'
1209         super().__init__(self.msg)
1210
1211
1212 class ContentTooShortError(YoutubeDLError):
1213     """Content Too Short exception.
1214
1215     This exception may be raised by FileDownloader objects when a file they
1216     download is too small for what the server announced first, indicating
1217     the connection was probably interrupted.
1218     """
1219
1220     def __init__(self, downloaded, expected):
1221         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1222         # Both in bytes
1223         self.downloaded = downloaded
1224         self.expected = expected
1225
1226
1227 class XAttrMetadataError(YoutubeDLError):
1228     def __init__(self, code=None, msg='Unknown error'):
1229         super().__init__(msg)
1230         self.code = code
1231         self.msg = msg
1232
1233         # Parsing code and msg
1234         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1235                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1236             self.reason = 'NO_SPACE'
1237         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1238             self.reason = 'VALUE_TOO_LONG'
1239         else:
1240             self.reason = 'NOT_SUPPORTED'
1241
1242
1243 class XAttrUnavailableError(YoutubeDLError):
1244     pass
1245
1246
1247 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1248     hc = http_class(*args, **kwargs)
1249     source_address = ydl_handler._params.get('source_address')
1250
1251     if source_address is not None:
1252         # This is to workaround _create_connection() from socket where it will try all
1253         # address data from getaddrinfo() including IPv6. This filters the result from
1254         # getaddrinfo() based on the source_address value.
1255         # This is based on the cpython socket.create_connection() function.
1256         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1257         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1258             host, port = address
1259             err = None
1260             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1261             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1262             ip_addrs = [addr for addr in addrs if addr[0] == af]
1263             if addrs and not ip_addrs:
1264                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1265                 raise OSError(
1266                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1267                     % (ip_version, source_address[0]))
1268             for res in ip_addrs:
1269                 af, socktype, proto, canonname, sa = res
1270                 sock = None
1271                 try:
1272                     sock = socket.socket(af, socktype, proto)
1273                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1274                         sock.settimeout(timeout)
1275                     sock.bind(source_address)
1276                     sock.connect(sa)
1277                     err = None  # Explicitly break reference cycle
1278                     return sock
1279                 except OSError as _:
1280                     err = _
1281                     if sock is not None:
1282                         sock.close()
1283             if err is not None:
1284                 raise err
1285             else:
1286                 raise OSError('getaddrinfo returns an empty list')
1287         if hasattr(hc, '_create_connection'):
1288             hc._create_connection = _create_connection
1289         hc.source_address = (source_address, 0)
1290
1291     return hc
1292
1293
1294 def handle_youtubedl_headers(headers):
1295     filtered_headers = headers
1296
1297     if 'Youtubedl-no-compression' in filtered_headers:
1298         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1299         del filtered_headers['Youtubedl-no-compression']
1300
1301     return filtered_headers
1302
1303
1304 class YoutubeDLHandler(urllib.request.HTTPHandler):
1305     """Handler for HTTP requests and responses.
1306
1307     This class, when installed with an OpenerDirector, automatically adds
1308     the standard headers to every HTTP request and handles gzipped and
1309     deflated responses from web servers. If compression is to be avoided in
1310     a particular request, the original request in the program code only has
1311     to include the HTTP header "Youtubedl-no-compression", which will be
1312     removed before making the real request.
1313
1314     Part of this code was copied from:
1315
1316     http://techknack.net/python-urllib2-handlers/
1317
1318     Andrew Rowls, the author of that code, agreed to release it to the
1319     public domain.
1320     """
1321
1322     def __init__(self, params, *args, **kwargs):
1323         urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
1324         self._params = params
1325
1326     def http_open(self, req):
1327         conn_class = http.client.HTTPConnection
1328
1329         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1330         if socks_proxy:
1331             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1332             del req.headers['Ytdl-socks-proxy']
1333
1334         return self.do_open(functools.partial(
1335             _create_http_connection, self, conn_class, False),
1336             req)
1337
1338     @staticmethod
1339     def deflate(data):
1340         if not data:
1341             return data
1342         try:
1343             return zlib.decompress(data, -zlib.MAX_WBITS)
1344         except zlib.error:
1345             return zlib.decompress(data)
1346
1347     @staticmethod
1348     def brotli(data):
1349         if not data:
1350             return data
1351         return brotli.decompress(data)
1352
1353     def http_request(self, req):
1354         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1355         # always respected by websites, some tend to give out URLs with non percent-encoded
1356         # non-ASCII characters (see telemb.py, ard.py [#3412])
1357         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1358         # To work around aforementioned issue we will replace request's original URL with
1359         # percent-encoded one
1360         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1361         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1362         url = req.get_full_url()
1363         url_escaped = escape_url(url)
1364
1365         # Substitute URL if any change after escaping
1366         if url != url_escaped:
1367             req = update_Request(req, url=url_escaped)
1368
1369         for h, v in self._params.get('http_headers', std_headers).items():
1370             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1371             # The dict keys are capitalized because of this bug by urllib
1372             if h.capitalize() not in req.headers:
1373                 req.add_header(h, v)
1374
1375         if 'Accept-encoding' not in req.headers:
1376             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1377
1378         req.headers = handle_youtubedl_headers(req.headers)
1379
1380         return super().do_request_(req)
1381
1382     def http_response(self, req, resp):
1383         old_resp = resp
1384         # gzip
1385         if resp.headers.get('Content-encoding', '') == 'gzip':
1386             content = resp.read()
1387             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1388             try:
1389                 uncompressed = io.BytesIO(gz.read())
1390             except OSError as original_ioerror:
1391                 # There may be junk add the end of the file
1392                 # See http://stackoverflow.com/q/4928560/35070 for details
1393                 for i in range(1, 1024):
1394                     try:
1395                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1396                         uncompressed = io.BytesIO(gz.read())
1397                     except OSError:
1398                         continue
1399                     break
1400                 else:
1401                     raise original_ioerror
1402             resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1403             resp.msg = old_resp.msg
1404             del resp.headers['Content-encoding']
1405         # deflate
1406         if resp.headers.get('Content-encoding', '') == 'deflate':
1407             gz = io.BytesIO(self.deflate(resp.read()))
1408             resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1409             resp.msg = old_resp.msg
1410             del resp.headers['Content-encoding']
1411         # brotli
1412         if resp.headers.get('Content-encoding', '') == 'br':
1413             resp = urllib.request.addinfourl(
1414                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1415             resp.msg = old_resp.msg
1416             del resp.headers['Content-encoding']
1417         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1418         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1419         if 300 <= resp.code < 400:
1420             location = resp.headers.get('Location')
1421             if location:
1422                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1423                 location = location.encode('iso-8859-1').decode()
1424                 location_escaped = escape_url(location)
1425                 if location != location_escaped:
1426                     del resp.headers['Location']
1427                     resp.headers['Location'] = location_escaped
1428         return resp
1429
1430     https_request = http_request
1431     https_response = http_response
1432
1433
1434 def make_socks_conn_class(base_class, socks_proxy):
1435     assert issubclass(base_class, (
1436         http.client.HTTPConnection, http.client.HTTPSConnection))
1437
1438     url_components = urllib.parse.urlparse(socks_proxy)
1439     if url_components.scheme.lower() == 'socks5':
1440         socks_type = ProxyType.SOCKS5
1441     elif url_components.scheme.lower() in ('socks', 'socks4'):
1442         socks_type = ProxyType.SOCKS4
1443     elif url_components.scheme.lower() == 'socks4a':
1444         socks_type = ProxyType.SOCKS4A
1445
1446     def unquote_if_non_empty(s):
1447         if not s:
1448             return s
1449         return urllib.parse.unquote_plus(s)
1450
1451     proxy_args = (
1452         socks_type,
1453         url_components.hostname, url_components.port or 1080,
1454         True,  # Remote DNS
1455         unquote_if_non_empty(url_components.username),
1456         unquote_if_non_empty(url_components.password),
1457     )
1458
1459     class SocksConnection(base_class):
1460         def connect(self):
1461             self.sock = sockssocket()
1462             self.sock.setproxy(*proxy_args)
1463             if isinstance(self.timeout, (int, float)):
1464                 self.sock.settimeout(self.timeout)
1465             self.sock.connect((self.host, self.port))
1466
1467             if isinstance(self, http.client.HTTPSConnection):
1468                 if hasattr(self, '_context'):  # Python > 2.6
1469                     self.sock = self._context.wrap_socket(
1470                         self.sock, server_hostname=self.host)
1471                 else:
1472                     self.sock = ssl.wrap_socket(self.sock)
1473
1474     return SocksConnection
1475
1476
1477 class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
1478     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1479         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
1480         self._https_conn_class = https_conn_class or http.client.HTTPSConnection
1481         self._params = params
1482
1483     def https_open(self, req):
1484         kwargs = {}
1485         conn_class = self._https_conn_class
1486
1487         if hasattr(self, '_context'):  # python > 2.6
1488             kwargs['context'] = self._context
1489         if hasattr(self, '_check_hostname'):  # python 3.x
1490             kwargs['check_hostname'] = self._check_hostname
1491
1492         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1493         if socks_proxy:
1494             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1495             del req.headers['Ytdl-socks-proxy']
1496
1497         try:
1498             return self.do_open(
1499                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1500         except urllib.error.URLError as e:
1501             if (isinstance(e.reason, ssl.SSLError)
1502                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1503                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1504             raise
1505
1506
1507 def is_path_like(f):
1508     return isinstance(f, (str, bytes, os.PathLike))
1509
1510
1511 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1512     """
1513     See [1] for cookie file format.
1514
1515     1. https://curl.haxx.se/docs/http-cookies.html
1516     """
1517     _HTTPONLY_PREFIX = '#HttpOnly_'
1518     _ENTRY_LEN = 7
1519     _HEADER = '''# Netscape HTTP Cookie File
1520 # This file is generated by yt-dlp.  Do not edit.
1521
1522 '''
1523     _CookieFileEntry = collections.namedtuple(
1524         'CookieFileEntry',
1525         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1526
1527     def __init__(self, filename=None, *args, **kwargs):
1528         super().__init__(None, *args, **kwargs)
1529         if is_path_like(filename):
1530             filename = os.fspath(filename)
1531         self.filename = filename
1532
1533     @staticmethod
1534     def _true_or_false(cndn):
1535         return 'TRUE' if cndn else 'FALSE'
1536
1537     @contextlib.contextmanager
1538     def open(self, file, *, write=False):
1539         if is_path_like(file):
1540             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1541                 yield f
1542         else:
1543             if write:
1544                 file.truncate(0)
1545             yield file
1546
1547     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1548         now = time.time()
1549         for cookie in self:
1550             if (not ignore_discard and cookie.discard
1551                     or not ignore_expires and cookie.is_expired(now)):
1552                 continue
1553             name, value = cookie.name, cookie.value
1554             if value is None:
1555                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1556                 # with no name, whereas http.cookiejar regards it as a
1557                 # cookie with no value.
1558                 name, value = '', name
1559             f.write('%s\n' % '\t'.join((
1560                 cookie.domain,
1561                 self._true_or_false(cookie.domain.startswith('.')),
1562                 cookie.path,
1563                 self._true_or_false(cookie.secure),
1564                 str_or_none(cookie.expires, default=''),
1565                 name, value
1566             )))
1567
1568     def save(self, filename=None, *args, **kwargs):
1569         """
1570         Save cookies to a file.
1571         Code is taken from CPython 3.6
1572         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1573
1574         if filename is None:
1575             if self.filename is not None:
1576                 filename = self.filename
1577             else:
1578                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1579
1580         # Store session cookies with `expires` set to 0 instead of an empty string
1581         for cookie in self:
1582             if cookie.expires is None:
1583                 cookie.expires = 0
1584
1585         with self.open(filename, write=True) as f:
1586             f.write(self._HEADER)
1587             self._really_save(f, *args, **kwargs)
1588
1589     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1590         """Load cookies from a file."""
1591         if filename is None:
1592             if self.filename is not None:
1593                 filename = self.filename
1594             else:
1595                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1596
1597         def prepare_line(line):
1598             if line.startswith(self._HTTPONLY_PREFIX):
1599                 line = line[len(self._HTTPONLY_PREFIX):]
1600             # comments and empty lines are fine
1601             if line.startswith('#') or not line.strip():
1602                 return line
1603             cookie_list = line.split('\t')
1604             if len(cookie_list) != self._ENTRY_LEN:
1605                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1606             cookie = self._CookieFileEntry(*cookie_list)
1607             if cookie.expires_at and not cookie.expires_at.isdigit():
1608                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1609             return line
1610
1611         cf = io.StringIO()
1612         with self.open(filename) as f:
1613             for line in f:
1614                 try:
1615                     cf.write(prepare_line(line))
1616                 except http.cookiejar.LoadError as e:
1617                     if f'{line.strip()} '[0] in '[{"':
1618                         raise http.cookiejar.LoadError(
1619                             'Cookies file must be Netscape formatted, not JSON. See  '
1620                             'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1621                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1622                     continue
1623         cf.seek(0)
1624         self._really_load(cf, filename, ignore_discard, ignore_expires)
1625         # Session cookies are denoted by either `expires` field set to
1626         # an empty string or 0. MozillaCookieJar only recognizes the former
1627         # (see [1]). So we need force the latter to be recognized as session
1628         # cookies on our own.
1629         # Session cookies may be important for cookies-based authentication,
1630         # e.g. usually, when user does not check 'Remember me' check box while
1631         # logging in on a site, some important cookies are stored as session
1632         # cookies so that not recognizing them will result in failed login.
1633         # 1. https://bugs.python.org/issue17164
1634         for cookie in self:
1635             # Treat `expires=0` cookies as session cookies
1636             if cookie.expires == 0:
1637                 cookie.expires = None
1638                 cookie.discard = True
1639
1640
1641 class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
1642     def __init__(self, cookiejar=None):
1643         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
1644
1645     def http_response(self, request, response):
1646         return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
1647
1648     https_request = urllib.request.HTTPCookieProcessor.http_request
1649     https_response = http_response
1650
1651
1652 class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
1653     """YoutubeDL redirect handler
1654
1655     The code is based on HTTPRedirectHandler implementation from CPython [1].
1656
1657     This redirect handler solves two issues:
1658      - ensures redirect URL is always unicode under python 2
1659      - introduces support for experimental HTTP response status code
1660        308 Permanent Redirect [2] used by some sites [3]
1661
1662     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1663     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1664     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1665     """
1666
1667     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
1668
1669     def redirect_request(self, req, fp, code, msg, headers, newurl):
1670         """Return a Request or None in response to a redirect.
1671
1672         This is called by the http_error_30x methods when a
1673         redirection response is received.  If a redirection should
1674         take place, return a new Request to allow http_error_30x to
1675         perform the redirect.  Otherwise, raise HTTPError if no-one
1676         else should try to handle this url.  Return None if you can't
1677         but another Handler might.
1678         """
1679         m = req.get_method()
1680         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1681                  or code in (301, 302, 303) and m == "POST")):
1682             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
1683         # Strictly (according to RFC 2616), 301 or 302 in response to
1684         # a POST MUST NOT cause a redirection without confirmation
1685         # from the user (of urllib.request, in this case).  In practice,
1686         # essentially all clients do redirect in this case, so we do
1687         # the same.
1688
1689         # Be conciliant with URIs containing a space.  This is mainly
1690         # redundant with the more complete encoding done in http_error_302(),
1691         # but it is kept for compatibility with other callers.
1692         newurl = newurl.replace(' ', '%20')
1693
1694         CONTENT_HEADERS = ("content-length", "content-type")
1695         # NB: don't use dict comprehension for python 2.6 compatibility
1696         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1697
1698         # A 303 must either use GET or HEAD for subsequent request
1699         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1700         if code == 303 and m != 'HEAD':
1701             m = 'GET'
1702         # 301 and 302 redirects are commonly turned into a GET from a POST
1703         # for subsequent requests by browsers, so we'll do the same.
1704         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1705         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1706         if code in (301, 302) and m == 'POST':
1707             m = 'GET'
1708
1709         return urllib.request.Request(
1710             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1711             unverifiable=True, method=m)
1712
1713
1714 def extract_timezone(date_str):
1715     m = re.search(
1716         r'''(?x)
1717             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1718             (?P<tz>Z|                                            # just the UTC Z, or
1719                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1720                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1721                    [ ]?                                          # optional space
1722                 (?P<sign>\+|-)                                   # +/-
1723                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1724             $)
1725         ''', date_str)
1726     if not m:
1727         m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1728         timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
1729         if timezone is not None:
1730             date_str = date_str[:-len(m.group('tz'))]
1731         timezone = datetime.timedelta(hours=timezone or 0)
1732     else:
1733         date_str = date_str[:-len(m.group('tz'))]
1734         if not m.group('sign'):
1735             timezone = datetime.timedelta()
1736         else:
1737             sign = 1 if m.group('sign') == '+' else -1
1738             timezone = datetime.timedelta(
1739                 hours=sign * int(m.group('hours')),
1740                 minutes=sign * int(m.group('minutes')))
1741     return timezone, date_str
1742
1743
1744 def parse_iso8601(date_str, delimiter='T', timezone=None):
1745     """ Return a UNIX timestamp from the given date """
1746
1747     if date_str is None:
1748         return None
1749
1750     date_str = re.sub(r'\.[0-9]+', '', date_str)
1751
1752     if timezone is None:
1753         timezone, date_str = extract_timezone(date_str)
1754
1755     with contextlib.suppress(ValueError):
1756         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1757         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1758         return calendar.timegm(dt.timetuple())
1759
1760
1761 def date_formats(day_first=True):
1762     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1763
1764
1765 def unified_strdate(date_str, day_first=True):
1766     """Return a string with the date in the format YYYYMMDD"""
1767
1768     if date_str is None:
1769         return None
1770     upload_date = None
1771     # Replace commas
1772     date_str = date_str.replace(',', ' ')
1773     # Remove AM/PM + timezone
1774     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1775     _, date_str = extract_timezone(date_str)
1776
1777     for expression in date_formats(day_first):
1778         with contextlib.suppress(ValueError):
1779             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1780     if upload_date is None:
1781         timetuple = email.utils.parsedate_tz(date_str)
1782         if timetuple:
1783             with contextlib.suppress(ValueError):
1784                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1785     if upload_date is not None:
1786         return str(upload_date)
1787
1788
1789 def unified_timestamp(date_str, day_first=True):
1790     if date_str is None:
1791         return None
1792
1793     date_str = re.sub(r'\s+', ' ', re.sub(
1794         r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
1795
1796     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1797     timezone, date_str = extract_timezone(date_str)
1798
1799     # Remove AM/PM + timezone
1800     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1801
1802     # Remove unrecognized timezones from ISO 8601 alike timestamps
1803     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1804     if m:
1805         date_str = date_str[:-len(m.group('tz'))]
1806
1807     # Python only supports microseconds, so remove nanoseconds
1808     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1809     if m:
1810         date_str = m.group(1)
1811
1812     for expression in date_formats(day_first):
1813         with contextlib.suppress(ValueError):
1814             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1815             return calendar.timegm(dt.timetuple())
1816
1817     timetuple = email.utils.parsedate_tz(date_str)
1818     if timetuple:
1819         return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
1820
1821
1822 def determine_ext(url, default_ext='unknown_video'):
1823     if url is None or '.' not in url:
1824         return default_ext
1825     guess = url.partition('?')[0].rpartition('.')[2]
1826     if re.match(r'^[A-Za-z0-9]+$', guess):
1827         return guess
1828     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1829     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1830         return guess.rstrip('/')
1831     else:
1832         return default_ext
1833
1834
1835 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1836     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1837
1838
1839 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1840     R"""
1841     Return a datetime object from a string.
1842     Supported format:
1843         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1844
1845     @param format       strftime format of DATE
1846     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1847                         auto: round to the unit provided in date_str (if applicable).
1848     """
1849     auto_precision = False
1850     if precision == 'auto':
1851         auto_precision = True
1852         precision = 'microsecond'
1853     today = datetime_round(datetime.datetime.utcnow(), precision)
1854     if date_str in ('now', 'today'):
1855         return today
1856     if date_str == 'yesterday':
1857         return today - datetime.timedelta(days=1)
1858     match = re.match(
1859         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1860         date_str)
1861     if match is not None:
1862         start_time = datetime_from_str(match.group('start'), precision, format)
1863         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1864         unit = match.group('unit')
1865         if unit == 'month' or unit == 'year':
1866             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1867             unit = 'day'
1868         else:
1869             if unit == 'week':
1870                 unit = 'day'
1871                 time *= 7
1872             delta = datetime.timedelta(**{unit + 's': time})
1873             new_date = start_time + delta
1874         if auto_precision:
1875             return datetime_round(new_date, unit)
1876         return new_date
1877
1878     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1879
1880
1881 def date_from_str(date_str, format='%Y%m%d', strict=False):
1882     R"""
1883     Return a date object from a string using datetime_from_str
1884
1885     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1886                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1887     """
1888     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1889         raise ValueError(f'Invalid date format "{date_str}"')
1890     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1891
1892
1893 def datetime_add_months(dt, months):
1894     """Increment/Decrement a datetime object by months."""
1895     month = dt.month + months - 1
1896     year = dt.year + month // 12
1897     month = month % 12 + 1
1898     day = min(dt.day, calendar.monthrange(year, month)[1])
1899     return dt.replace(year, month, day)
1900
1901
1902 def datetime_round(dt, precision='day'):
1903     """
1904     Round a datetime object's time to a specific precision
1905     """
1906     if precision == 'microsecond':
1907         return dt
1908
1909     unit_seconds = {
1910         'day': 86400,
1911         'hour': 3600,
1912         'minute': 60,
1913         'second': 1,
1914     }
1915     roundto = lambda x, n: ((x + n / 2) // n) * n
1916     timestamp = calendar.timegm(dt.timetuple())
1917     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1918
1919
1920 def hyphenate_date(date_str):
1921     """
1922     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1923     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1924     if match is not None:
1925         return '-'.join(match.groups())
1926     else:
1927         return date_str
1928
1929
1930 class DateRange:
1931     """Represents a time interval between two dates"""
1932
1933     def __init__(self, start=None, end=None):
1934         """start and end must be strings in the format accepted by date"""
1935         if start is not None:
1936             self.start = date_from_str(start, strict=True)
1937         else:
1938             self.start = datetime.datetime.min.date()
1939         if end is not None:
1940             self.end = date_from_str(end, strict=True)
1941         else:
1942             self.end = datetime.datetime.max.date()
1943         if self.start > self.end:
1944             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1945
1946     @classmethod
1947     def day(cls, day):
1948         """Returns a range that only contains the given day"""
1949         return cls(day, day)
1950
1951     def __contains__(self, date):
1952         """Check if the date is in the range"""
1953         if not isinstance(date, datetime.date):
1954             date = date_from_str(date)
1955         return self.start <= date <= self.end
1956
1957     def __str__(self):
1958         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1959
1960     def __eq__(self, other):
1961         return (isinstance(other, DateRange)
1962                 and self.start == other.start and self.end == other.end)
1963
1964
1965 def platform_name():
1966     """ Returns the platform name as a str """
1967     deprecation_warning(f'"{__name__}.platform_name" is deprecated, use "platform.platform" instead')
1968     return platform.platform()
1969
1970
1971 @functools.cache
1972 def system_identifier():
1973     python_implementation = platform.python_implementation()
1974     if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
1975         python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
1976     libc_ver = []
1977     with contextlib.suppress(OSError):  # We may not have access to the executable
1978         libc_ver = platform.libc_ver()
1979
1980     return 'Python %s (%s %s) - %s %s' % (
1981         platform.python_version(),
1982         python_implementation,
1983         platform.architecture()[0],
1984         platform.platform(),
1985         format_field(join_nonempty(*libc_ver, delim=' '), None, '(%s)'),
1986     )
1987
1988
1989 @functools.cache
1990 def get_windows_version():
1991     ''' Get Windows version. returns () if it's not running on Windows '''
1992     if compat_os_name == 'nt':
1993         return version_tuple(platform.win32_ver()[1])
1994     else:
1995         return ()
1996
1997
1998 def write_string(s, out=None, encoding=None):
1999     assert isinstance(s, str)
2000     out = out or sys.stderr
2001
2002     if compat_os_name == 'nt' and supports_terminal_sequences(out):
2003         s = re.sub(r'([\r\n]+)', r' \1', s)
2004
2005     enc, buffer = None, out
2006     if 'b' in getattr(out, 'mode', ''):
2007         enc = encoding or preferredencoding()
2008     elif hasattr(out, 'buffer'):
2009         buffer = out.buffer
2010         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
2011
2012     buffer.write(s.encode(enc, 'ignore') if enc else s)
2013     out.flush()
2014
2015
2016 def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
2017     from . import _IN_CLI
2018     if _IN_CLI:
2019         if msg in deprecation_warning._cache:
2020             return
2021         deprecation_warning._cache.add(msg)
2022         if printer:
2023             return printer(f'{msg}{bug_reports_message()}', **kwargs)
2024         return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
2025     else:
2026         import warnings
2027         warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
2028
2029
2030 deprecation_warning._cache = set()
2031
2032
2033 def bytes_to_intlist(bs):
2034     if not bs:
2035         return []
2036     if isinstance(bs[0], int):  # Python 3
2037         return list(bs)
2038     else:
2039         return [ord(c) for c in bs]
2040
2041
2042 def intlist_to_bytes(xs):
2043     if not xs:
2044         return b''
2045     return struct.pack('%dB' % len(xs), *xs)
2046
2047
2048 class LockingUnsupportedError(OSError):
2049     msg = 'File locking is not supported'
2050
2051     def __init__(self):
2052         super().__init__(self.msg)
2053
2054
2055 # Cross-platform file locking
2056 if sys.platform == 'win32':
2057     import ctypes
2058     import ctypes.wintypes
2059     import msvcrt
2060
2061     class OVERLAPPED(ctypes.Structure):
2062         _fields_ = [
2063             ('Internal', ctypes.wintypes.LPVOID),
2064             ('InternalHigh', ctypes.wintypes.LPVOID),
2065             ('Offset', ctypes.wintypes.DWORD),
2066             ('OffsetHigh', ctypes.wintypes.DWORD),
2067             ('hEvent', ctypes.wintypes.HANDLE),
2068         ]
2069
2070     kernel32 = ctypes.windll.kernel32
2071     LockFileEx = kernel32.LockFileEx
2072     LockFileEx.argtypes = [
2073         ctypes.wintypes.HANDLE,     # hFile
2074         ctypes.wintypes.DWORD,      # dwFlags
2075         ctypes.wintypes.DWORD,      # dwReserved
2076         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2077         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2078         ctypes.POINTER(OVERLAPPED)  # Overlapped
2079     ]
2080     LockFileEx.restype = ctypes.wintypes.BOOL
2081     UnlockFileEx = kernel32.UnlockFileEx
2082     UnlockFileEx.argtypes = [
2083         ctypes.wintypes.HANDLE,     # hFile
2084         ctypes.wintypes.DWORD,      # dwReserved
2085         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2086         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2087         ctypes.POINTER(OVERLAPPED)  # Overlapped
2088     ]
2089     UnlockFileEx.restype = ctypes.wintypes.BOOL
2090     whole_low = 0xffffffff
2091     whole_high = 0x7fffffff
2092
2093     def _lock_file(f, exclusive, block):
2094         overlapped = OVERLAPPED()
2095         overlapped.Offset = 0
2096         overlapped.OffsetHigh = 0
2097         overlapped.hEvent = 0
2098         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2099
2100         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2101                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2102                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2103             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2104             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2105
2106     def _unlock_file(f):
2107         assert f._lock_file_overlapped_p
2108         handle = msvcrt.get_osfhandle(f.fileno())
2109         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2110             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2111
2112 else:
2113     try:
2114         import fcntl
2115
2116         def _lock_file(f, exclusive, block):
2117             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2118             if not block:
2119                 flags |= fcntl.LOCK_NB
2120             try:
2121                 fcntl.flock(f, flags)
2122             except BlockingIOError:
2123                 raise
2124             except OSError:  # AOSP does not have flock()
2125                 fcntl.lockf(f, flags)
2126
2127         def _unlock_file(f):
2128             try:
2129                 fcntl.flock(f, fcntl.LOCK_UN)
2130             except OSError:
2131                 fcntl.lockf(f, fcntl.LOCK_UN)
2132
2133     except ImportError:
2134
2135         def _lock_file(f, exclusive, block):
2136             raise LockingUnsupportedError()
2137
2138         def _unlock_file(f):
2139             raise LockingUnsupportedError()
2140
2141
2142 class locked_file:
2143     locked = False
2144
2145     def __init__(self, filename, mode, block=True, encoding=None):
2146         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2147             raise NotImplementedError(mode)
2148         self.mode, self.block = mode, block
2149
2150         writable = any(f in mode for f in 'wax+')
2151         readable = any(f in mode for f in 'r+')
2152         flags = functools.reduce(operator.ior, (
2153             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2154             getattr(os, 'O_BINARY', 0),  # Windows only
2155             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2156             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2157             os.O_APPEND if 'a' in mode else 0,
2158             os.O_EXCL if 'x' in mode else 0,
2159             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2160         ))
2161
2162         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2163
2164     def __enter__(self):
2165         exclusive = 'r' not in self.mode
2166         try:
2167             _lock_file(self.f, exclusive, self.block)
2168             self.locked = True
2169         except OSError:
2170             self.f.close()
2171             raise
2172         if 'w' in self.mode:
2173             try:
2174                 self.f.truncate()
2175             except OSError as e:
2176                 if e.errno not in (
2177                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2178                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2179                 ):
2180                     raise
2181         return self
2182
2183     def unlock(self):
2184         if not self.locked:
2185             return
2186         try:
2187             _unlock_file(self.f)
2188         finally:
2189             self.locked = False
2190
2191     def __exit__(self, *_):
2192         try:
2193             self.unlock()
2194         finally:
2195             self.f.close()
2196
2197     open = __enter__
2198     close = __exit__
2199
2200     def __getattr__(self, attr):
2201         return getattr(self.f, attr)
2202
2203     def __iter__(self):
2204         return iter(self.f)
2205
2206
2207 @functools.cache
2208 def get_filesystem_encoding():
2209     encoding = sys.getfilesystemencoding()
2210     return encoding if encoding is not None else 'utf-8'
2211
2212
2213 def shell_quote(args):
2214     quoted_args = []
2215     encoding = get_filesystem_encoding()
2216     for a in args:
2217         if isinstance(a, bytes):
2218             # We may get a filename encoded with 'encodeFilename'
2219             a = a.decode(encoding)
2220         quoted_args.append(compat_shlex_quote(a))
2221     return ' '.join(quoted_args)
2222
2223
2224 def smuggle_url(url, data):
2225     """ Pass additional data in a URL for internal use. """
2226
2227     url, idata = unsmuggle_url(url, {})
2228     data.update(idata)
2229     sdata = urllib.parse.urlencode(
2230         {'__youtubedl_smuggle': json.dumps(data)})
2231     return url + '#' + sdata
2232
2233
2234 def unsmuggle_url(smug_url, default=None):
2235     if '#__youtubedl_smuggle' not in smug_url:
2236         return smug_url, default
2237     url, _, sdata = smug_url.rpartition('#')
2238     jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
2239     data = json.loads(jsond)
2240     return url, data
2241
2242
2243 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2244     """ Formats numbers with decimal sufixes like K, M, etc """
2245     num, factor = float_or_none(num), float(factor)
2246     if num is None or num < 0:
2247         return None
2248     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2249     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2250     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2251     if factor == 1024:
2252         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2253     converted = num / (factor ** exponent)
2254     return fmt % (converted, suffix)
2255
2256
2257 def format_bytes(bytes):
2258     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2259
2260
2261 def lookup_unit_table(unit_table, s):
2262     units_re = '|'.join(re.escape(u) for u in unit_table)
2263     m = re.match(
2264         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2265     if not m:
2266         return None
2267     num_str = m.group('num').replace(',', '.')
2268     mult = unit_table[m.group('unit')]
2269     return int(float(num_str) * mult)
2270
2271
2272 def parse_filesize(s):
2273     if s is None:
2274         return None
2275
2276     # The lower-case forms are of course incorrect and unofficial,
2277     # but we support those too
2278     _UNIT_TABLE = {
2279         'B': 1,
2280         'b': 1,
2281         'bytes': 1,
2282         'KiB': 1024,
2283         'KB': 1000,
2284         'kB': 1024,
2285         'Kb': 1000,
2286         'kb': 1000,
2287         'kilobytes': 1000,
2288         'kibibytes': 1024,
2289         'MiB': 1024 ** 2,
2290         'MB': 1000 ** 2,
2291         'mB': 1024 ** 2,
2292         'Mb': 1000 ** 2,
2293         'mb': 1000 ** 2,
2294         'megabytes': 1000 ** 2,
2295         'mebibytes': 1024 ** 2,
2296         'GiB': 1024 ** 3,
2297         'GB': 1000 ** 3,
2298         'gB': 1024 ** 3,
2299         'Gb': 1000 ** 3,
2300         'gb': 1000 ** 3,
2301         'gigabytes': 1000 ** 3,
2302         'gibibytes': 1024 ** 3,
2303         'TiB': 1024 ** 4,
2304         'TB': 1000 ** 4,
2305         'tB': 1024 ** 4,
2306         'Tb': 1000 ** 4,
2307         'tb': 1000 ** 4,
2308         'terabytes': 1000 ** 4,
2309         'tebibytes': 1024 ** 4,
2310         'PiB': 1024 ** 5,
2311         'PB': 1000 ** 5,
2312         'pB': 1024 ** 5,
2313         'Pb': 1000 ** 5,
2314         'pb': 1000 ** 5,
2315         'petabytes': 1000 ** 5,
2316         'pebibytes': 1024 ** 5,
2317         'EiB': 1024 ** 6,
2318         'EB': 1000 ** 6,
2319         'eB': 1024 ** 6,
2320         'Eb': 1000 ** 6,
2321         'eb': 1000 ** 6,
2322         'exabytes': 1000 ** 6,
2323         'exbibytes': 1024 ** 6,
2324         'ZiB': 1024 ** 7,
2325         'ZB': 1000 ** 7,
2326         'zB': 1024 ** 7,
2327         'Zb': 1000 ** 7,
2328         'zb': 1000 ** 7,
2329         'zettabytes': 1000 ** 7,
2330         'zebibytes': 1024 ** 7,
2331         'YiB': 1024 ** 8,
2332         'YB': 1000 ** 8,
2333         'yB': 1024 ** 8,
2334         'Yb': 1000 ** 8,
2335         'yb': 1000 ** 8,
2336         'yottabytes': 1000 ** 8,
2337         'yobibytes': 1024 ** 8,
2338     }
2339
2340     return lookup_unit_table(_UNIT_TABLE, s)
2341
2342
2343 def parse_count(s):
2344     if s is None:
2345         return None
2346
2347     s = re.sub(r'^[^\d]+\s', '', s).strip()
2348
2349     if re.match(r'^[\d,.]+$', s):
2350         return str_to_int(s)
2351
2352     _UNIT_TABLE = {
2353         'k': 1000,
2354         'K': 1000,
2355         'm': 1000 ** 2,
2356         'M': 1000 ** 2,
2357         'kk': 1000 ** 2,
2358         'KK': 1000 ** 2,
2359         'b': 1000 ** 3,
2360         'B': 1000 ** 3,
2361     }
2362
2363     ret = lookup_unit_table(_UNIT_TABLE, s)
2364     if ret is not None:
2365         return ret
2366
2367     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2368     if mobj:
2369         return str_to_int(mobj.group(1))
2370
2371
2372 def parse_resolution(s, *, lenient=False):
2373     if s is None:
2374         return {}
2375
2376     if lenient:
2377         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2378     else:
2379         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2380     if mobj:
2381         return {
2382             'width': int(mobj.group('w')),
2383             'height': int(mobj.group('h')),
2384         }
2385
2386     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2387     if mobj:
2388         return {'height': int(mobj.group(1))}
2389
2390     mobj = re.search(r'\b([48])[kK]\b', s)
2391     if mobj:
2392         return {'height': int(mobj.group(1)) * 540}
2393
2394     return {}
2395
2396
2397 def parse_bitrate(s):
2398     if not isinstance(s, str):
2399         return
2400     mobj = re.search(r'\b(\d+)\s*kbps', s)
2401     if mobj:
2402         return int(mobj.group(1))
2403
2404
2405 def month_by_name(name, lang='en'):
2406     """ Return the number of a month by (locale-independently) English name """
2407
2408     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2409
2410     try:
2411         return month_names.index(name) + 1
2412     except ValueError:
2413         return None
2414
2415
2416 def month_by_abbreviation(abbrev):
2417     """ Return the number of a month by (locale-independently) English
2418         abbreviations """
2419
2420     try:
2421         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2422     except ValueError:
2423         return None
2424
2425
2426 def fix_xml_ampersands(xml_str):
2427     """Replace all the '&' by '&amp;' in XML"""
2428     return re.sub(
2429         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2430         '&amp;',
2431         xml_str)
2432
2433
2434 def setproctitle(title):
2435     assert isinstance(title, str)
2436
2437     # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
2438     try:
2439         import ctypes
2440     except ImportError:
2441         return
2442
2443     try:
2444         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2445     except OSError:
2446         return
2447     except TypeError:
2448         # LoadLibrary in Windows Python 2.7.13 only expects
2449         # a bytestring, but since unicode_literals turns
2450         # every string into a unicode string, it fails.
2451         return
2452     title_bytes = title.encode()
2453     buf = ctypes.create_string_buffer(len(title_bytes))
2454     buf.value = title_bytes
2455     try:
2456         libc.prctl(15, buf, 0, 0, 0)
2457     except AttributeError:
2458         return  # Strange libc, just skip this
2459
2460
2461 def remove_start(s, start):
2462     return s[len(start):] if s is not None and s.startswith(start) else s
2463
2464
2465 def remove_end(s, end):
2466     return s[:-len(end)] if s is not None and s.endswith(end) else s
2467
2468
2469 def remove_quotes(s):
2470     if s is None or len(s) < 2:
2471         return s
2472     for quote in ('"', "'", ):
2473         if s[0] == quote and s[-1] == quote:
2474             return s[1:-1]
2475     return s
2476
2477
2478 def get_domain(url):
2479     """
2480     This implementation is inconsistent, but is kept for compatibility.
2481     Use this only for "webpage_url_domain"
2482     """
2483     return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
2484
2485
2486 def url_basename(url):
2487     path = urllib.parse.urlparse(url).path
2488     return path.strip('/').split('/')[-1]
2489
2490
2491 def base_url(url):
2492     return re.match(r'https?://[^?#]+/', url).group()
2493
2494
2495 def urljoin(base, path):
2496     if isinstance(path, bytes):
2497         path = path.decode()
2498     if not isinstance(path, str) or not path:
2499         return None
2500     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2501         return path
2502     if isinstance(base, bytes):
2503         base = base.decode()
2504     if not isinstance(base, str) or not re.match(
2505             r'^(?:https?:)?//', base):
2506         return None
2507     return urllib.parse.urljoin(base, path)
2508
2509
2510 class HEADRequest(urllib.request.Request):
2511     def get_method(self):
2512         return 'HEAD'
2513
2514
2515 class PUTRequest(urllib.request.Request):
2516     def get_method(self):
2517         return 'PUT'
2518
2519
2520 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2521     if get_attr and v is not None:
2522         v = getattr(v, get_attr, None)
2523     try:
2524         return int(v) * invscale // scale
2525     except (ValueError, TypeError, OverflowError):
2526         return default
2527
2528
2529 def str_or_none(v, default=None):
2530     return default if v is None else str(v)
2531
2532
2533 def str_to_int(int_str):
2534     """ A more relaxed version of int_or_none """
2535     if isinstance(int_str, int):
2536         return int_str
2537     elif isinstance(int_str, str):
2538         int_str = re.sub(r'[,\.\+]', '', int_str)
2539         return int_or_none(int_str)
2540
2541
2542 def float_or_none(v, scale=1, invscale=1, default=None):
2543     if v is None:
2544         return default
2545     try:
2546         return float(v) * invscale / scale
2547     except (ValueError, TypeError):
2548         return default
2549
2550
2551 def bool_or_none(v, default=None):
2552     return v if isinstance(v, bool) else default
2553
2554
2555 def strip_or_none(v, default=None):
2556     return v.strip() if isinstance(v, str) else default
2557
2558
2559 def url_or_none(url):
2560     if not url or not isinstance(url, str):
2561         return None
2562     url = url.strip()
2563     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2564
2565
2566 def request_to_url(req):
2567     if isinstance(req, urllib.request.Request):
2568         return req.get_full_url()
2569     else:
2570         return req
2571
2572
2573 def strftime_or_none(timestamp, date_format, default=None):
2574     datetime_object = None
2575     try:
2576         if isinstance(timestamp, (int, float)):  # unix timestamp
2577             # Using naive datetime here can break timestamp() in Windows
2578             # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
2579             datetime_object = datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
2580         elif isinstance(timestamp, str):  # assume YYYYMMDD
2581             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2582         date_format = re.sub(  # Support %s on windows
2583             r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
2584         return datetime_object.strftime(date_format)
2585     except (ValueError, TypeError, AttributeError):
2586         return default
2587
2588
2589 def parse_duration(s):
2590     if not isinstance(s, str):
2591         return None
2592     s = s.strip()
2593     if not s:
2594         return None
2595
2596     days, hours, mins, secs, ms = [None] * 5
2597     m = re.match(r'''(?x)
2598             (?P<before_secs>
2599                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2600             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2601             (?P<ms>[.:][0-9]+)?Z?$
2602         ''', s)
2603     if m:
2604         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2605     else:
2606         m = re.match(
2607             r'''(?ix)(?:P?
2608                 (?:
2609                     [0-9]+\s*y(?:ears?)?,?\s*
2610                 )?
2611                 (?:
2612                     [0-9]+\s*m(?:onths?)?,?\s*
2613                 )?
2614                 (?:
2615                     [0-9]+\s*w(?:eeks?)?,?\s*
2616                 )?
2617                 (?:
2618                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2619                 )?
2620                 T)?
2621                 (?:
2622                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2623                 )?
2624                 (?:
2625                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2626                 )?
2627                 (?:
2628                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2629                 )?Z?$''', s)
2630         if m:
2631             days, hours, mins, secs, ms = m.groups()
2632         else:
2633             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2634             if m:
2635                 hours, mins = m.groups()
2636             else:
2637                 return None
2638
2639     if ms:
2640         ms = ms.replace(':', '.')
2641     return sum(float(part or 0) * mult for part, mult in (
2642         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2643
2644
2645 def prepend_extension(filename, ext, expected_real_ext=None):
2646     name, real_ext = os.path.splitext(filename)
2647     return (
2648         f'{name}.{ext}{real_ext}'
2649         if not expected_real_ext or real_ext[1:] == expected_real_ext
2650         else f'{filename}.{ext}')
2651
2652
2653 def replace_extension(filename, ext, expected_real_ext=None):
2654     name, real_ext = os.path.splitext(filename)
2655     return '{}.{}'.format(
2656         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2657         ext)
2658
2659
2660 def check_executable(exe, args=[]):
2661     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2662     args can be a list of arguments for a short output (like -version) """
2663     try:
2664         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2665     except OSError:
2666         return False
2667     return exe
2668
2669
2670 def _get_exe_version_output(exe, args, *, to_screen=None):
2671     if to_screen:
2672         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2673     try:
2674         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2675         # SIGTTOU if yt-dlp is run in the background.
2676         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2677         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2678                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2679     except OSError:
2680         return False
2681     return stdout
2682
2683
2684 def detect_exe_version(output, version_re=None, unrecognized='present'):
2685     assert isinstance(output, str)
2686     if version_re is None:
2687         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2688     m = re.search(version_re, output)
2689     if m:
2690         return m.group(1)
2691     else:
2692         return unrecognized
2693
2694
2695 def get_exe_version(exe, args=['--version'],
2696                     version_re=None, unrecognized='present'):
2697     """ Returns the version of the specified executable,
2698     or False if the executable is not present """
2699     out = _get_exe_version_output(exe, args)
2700     return detect_exe_version(out, version_re, unrecognized) if out else False
2701
2702
2703 def frange(start=0, stop=None, step=1):
2704     """Float range"""
2705     if stop is None:
2706         start, stop = 0, start
2707     sign = [-1, 1][step > 0] if step else 0
2708     while sign * start < sign * stop:
2709         yield start
2710         start += step
2711
2712
2713 class LazyList(collections.abc.Sequence):
2714     """Lazy immutable list from an iterable
2715     Note that slices of a LazyList are lists and not LazyList"""
2716
2717     class IndexError(IndexError):
2718         pass
2719
2720     def __init__(self, iterable, *, reverse=False, _cache=None):
2721         self._iterable = iter(iterable)
2722         self._cache = [] if _cache is None else _cache
2723         self._reversed = reverse
2724
2725     def __iter__(self):
2726         if self._reversed:
2727             # We need to consume the entire iterable to iterate in reverse
2728             yield from self.exhaust()
2729             return
2730         yield from self._cache
2731         for item in self._iterable:
2732             self._cache.append(item)
2733             yield item
2734
2735     def _exhaust(self):
2736         self._cache.extend(self._iterable)
2737         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2738         return self._cache
2739
2740     def exhaust(self):
2741         """Evaluate the entire iterable"""
2742         return self._exhaust()[::-1 if self._reversed else 1]
2743
2744     @staticmethod
2745     def _reverse_index(x):
2746         return None if x is None else ~x
2747
2748     def __getitem__(self, idx):
2749         if isinstance(idx, slice):
2750             if self._reversed:
2751                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2752             start, stop, step = idx.start, idx.stop, idx.step or 1
2753         elif isinstance(idx, int):
2754             if self._reversed:
2755                 idx = self._reverse_index(idx)
2756             start, stop, step = idx, idx, 0
2757         else:
2758             raise TypeError('indices must be integers or slices')
2759         if ((start or 0) < 0 or (stop or 0) < 0
2760                 or (start is None and step < 0)
2761                 or (stop is None and step > 0)):
2762             # We need to consume the entire iterable to be able to slice from the end
2763             # Obviously, never use this with infinite iterables
2764             self._exhaust()
2765             try:
2766                 return self._cache[idx]
2767             except IndexError as e:
2768                 raise self.IndexError(e) from e
2769         n = max(start or 0, stop or 0) - len(self._cache) + 1
2770         if n > 0:
2771             self._cache.extend(itertools.islice(self._iterable, n))
2772         try:
2773             return self._cache[idx]
2774         except IndexError as e:
2775             raise self.IndexError(e) from e
2776
2777     def __bool__(self):
2778         try:
2779             self[-1] if self._reversed else self[0]
2780         except self.IndexError:
2781             return False
2782         return True
2783
2784     def __len__(self):
2785         self._exhaust()
2786         return len(self._cache)
2787
2788     def __reversed__(self):
2789         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2790
2791     def __copy__(self):
2792         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2793
2794     def __repr__(self):
2795         # repr and str should mimic a list. So we exhaust the iterable
2796         return repr(self.exhaust())
2797
2798     def __str__(self):
2799         return repr(self.exhaust())
2800
2801
2802 class PagedList:
2803
2804     class IndexError(IndexError):
2805         pass
2806
2807     def __len__(self):
2808         # This is only useful for tests
2809         return len(self.getslice())
2810
2811     def __init__(self, pagefunc, pagesize, use_cache=True):
2812         self._pagefunc = pagefunc
2813         self._pagesize = pagesize
2814         self._pagecount = float('inf')
2815         self._use_cache = use_cache
2816         self._cache = {}
2817
2818     def getpage(self, pagenum):
2819         page_results = self._cache.get(pagenum)
2820         if page_results is None:
2821             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2822         if self._use_cache:
2823             self._cache[pagenum] = page_results
2824         return page_results
2825
2826     def getslice(self, start=0, end=None):
2827         return list(self._getslice(start, end))
2828
2829     def _getslice(self, start, end):
2830         raise NotImplementedError('This method must be implemented by subclasses')
2831
2832     def __getitem__(self, idx):
2833         assert self._use_cache, 'Indexing PagedList requires cache'
2834         if not isinstance(idx, int) or idx < 0:
2835             raise TypeError('indices must be non-negative integers')
2836         entries = self.getslice(idx, idx + 1)
2837         if not entries:
2838             raise self.IndexError()
2839         return entries[0]
2840
2841
2842 class OnDemandPagedList(PagedList):
2843     """Download pages until a page with less than maximum results"""
2844
2845     def _getslice(self, start, end):
2846         for pagenum in itertools.count(start // self._pagesize):
2847             firstid = pagenum * self._pagesize
2848             nextfirstid = pagenum * self._pagesize + self._pagesize
2849             if start >= nextfirstid:
2850                 continue
2851
2852             startv = (
2853                 start % self._pagesize
2854                 if firstid <= start < nextfirstid
2855                 else 0)
2856             endv = (
2857                 ((end - 1) % self._pagesize) + 1
2858                 if (end is not None and firstid <= end <= nextfirstid)
2859                 else None)
2860
2861             try:
2862                 page_results = self.getpage(pagenum)
2863             except Exception:
2864                 self._pagecount = pagenum - 1
2865                 raise
2866             if startv != 0 or endv is not None:
2867                 page_results = page_results[startv:endv]
2868             yield from page_results
2869
2870             # A little optimization - if current page is not "full", ie. does
2871             # not contain page_size videos then we can assume that this page
2872             # is the last one - there are no more ids on further pages -
2873             # i.e. no need to query again.
2874             if len(page_results) + startv < self._pagesize:
2875                 break
2876
2877             # If we got the whole page, but the next page is not interesting,
2878             # break out early as well
2879             if end == nextfirstid:
2880                 break
2881
2882
2883 class InAdvancePagedList(PagedList):
2884     """PagedList with total number of pages known in advance"""
2885
2886     def __init__(self, pagefunc, pagecount, pagesize):
2887         PagedList.__init__(self, pagefunc, pagesize, True)
2888         self._pagecount = pagecount
2889
2890     def _getslice(self, start, end):
2891         start_page = start // self._pagesize
2892         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2893         skip_elems = start - start_page * self._pagesize
2894         only_more = None if end is None else end - start
2895         for pagenum in range(start_page, end_page):
2896             page_results = self.getpage(pagenum)
2897             if skip_elems:
2898                 page_results = page_results[skip_elems:]
2899                 skip_elems = None
2900             if only_more is not None:
2901                 if len(page_results) < only_more:
2902                     only_more -= len(page_results)
2903                 else:
2904                     yield from page_results[:only_more]
2905                     break
2906             yield from page_results
2907
2908
2909 class PlaylistEntries:
2910     MissingEntry = object()
2911     is_exhausted = False
2912
2913     def __init__(self, ydl, info_dict):
2914         self.ydl = ydl
2915
2916         # _entries must be assigned now since infodict can change during iteration
2917         entries = info_dict.get('entries')
2918         if entries is None:
2919             raise EntryNotInPlaylist('There are no entries')
2920         elif isinstance(entries, list):
2921             self.is_exhausted = True
2922
2923         requested_entries = info_dict.get('requested_entries')
2924         self.is_incomplete = bool(requested_entries)
2925         if self.is_incomplete:
2926             assert self.is_exhausted
2927             self._entries = [self.MissingEntry] * max(requested_entries)
2928             for i, entry in zip(requested_entries, entries):
2929                 self._entries[i - 1] = entry
2930         elif isinstance(entries, (list, PagedList, LazyList)):
2931             self._entries = entries
2932         else:
2933             self._entries = LazyList(entries)
2934
2935     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2936         (?P<start>[+-]?\d+)?
2937         (?P<range>[:-]
2938             (?P<end>[+-]?\d+|inf(?:inite)?)?
2939             (?::(?P<step>[+-]?\d+))?
2940         )?''')
2941
2942     @classmethod
2943     def parse_playlist_items(cls, string):
2944         for segment in string.split(','):
2945             if not segment:
2946                 raise ValueError('There is two or more consecutive commas')
2947             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2948             if not mobj:
2949                 raise ValueError(f'{segment!r} is not a valid specification')
2950             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2951             if int_or_none(step) == 0:
2952                 raise ValueError(f'Step in {segment!r} cannot be zero')
2953             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2954
2955     def get_requested_items(self):
2956         playlist_items = self.ydl.params.get('playlist_items')
2957         playlist_start = self.ydl.params.get('playliststart', 1)
2958         playlist_end = self.ydl.params.get('playlistend')
2959         # For backwards compatibility, interpret -1 as whole list
2960         if playlist_end in (-1, None):
2961             playlist_end = ''
2962         if not playlist_items:
2963             playlist_items = f'{playlist_start}:{playlist_end}'
2964         elif playlist_start != 1 or playlist_end:
2965             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
2966
2967         for index in self.parse_playlist_items(playlist_items):
2968             for i, entry in self[index]:
2969                 yield i, entry
2970                 if not entry:
2971                     continue
2972                 try:
2973                     # TODO: Add auto-generated fields
2974                     self.ydl._match_entry(entry, incomplete=True, silent=True)
2975                 except (ExistingVideoReached, RejectedVideoReached):
2976                     return
2977
2978     def get_full_count(self):
2979         if self.is_exhausted and not self.is_incomplete:
2980             return len(self)
2981         elif isinstance(self._entries, InAdvancePagedList):
2982             if self._entries._pagesize == 1:
2983                 return self._entries._pagecount
2984
2985     @functools.cached_property
2986     def _getter(self):
2987         if isinstance(self._entries, list):
2988             def get_entry(i):
2989                 try:
2990                     entry = self._entries[i]
2991                 except IndexError:
2992                     entry = self.MissingEntry
2993                     if not self.is_incomplete:
2994                         raise self.IndexError()
2995                 if entry is self.MissingEntry:
2996                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
2997                 return entry
2998         else:
2999             def get_entry(i):
3000                 try:
3001                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
3002                 except (LazyList.IndexError, PagedList.IndexError):
3003                     raise self.IndexError()
3004         return get_entry
3005
3006     def __getitem__(self, idx):
3007         if isinstance(idx, int):
3008             idx = slice(idx, idx)
3009
3010         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
3011         step = 1 if idx.step is None else idx.step
3012         if idx.start is None:
3013             start = 0 if step > 0 else len(self) - 1
3014         else:
3015             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
3016
3017         # NB: Do not call len(self) when idx == [:]
3018         if idx.stop is None:
3019             stop = 0 if step < 0 else float('inf')
3020         else:
3021             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
3022         stop += [-1, 1][step > 0]
3023
3024         for i in frange(start, stop, step):
3025             if i < 0:
3026                 continue
3027             try:
3028                 entry = self._getter(i)
3029             except self.IndexError:
3030                 self.is_exhausted = True
3031                 if step > 0:
3032                     break
3033                 continue
3034             yield i + 1, entry
3035
3036     def __len__(self):
3037         return len(tuple(self[:]))
3038
3039     class IndexError(IndexError):
3040         pass
3041
3042
3043 def uppercase_escape(s):
3044     unicode_escape = codecs.getdecoder('unicode_escape')
3045     return re.sub(
3046         r'\\U[0-9a-fA-F]{8}',
3047         lambda m: unicode_escape(m.group(0))[0],
3048         s)
3049
3050
3051 def lowercase_escape(s):
3052     unicode_escape = codecs.getdecoder('unicode_escape')
3053     return re.sub(
3054         r'\\u[0-9a-fA-F]{4}',
3055         lambda m: unicode_escape(m.group(0))[0],
3056         s)
3057
3058
3059 def escape_rfc3986(s):
3060     """Escape non-ASCII characters as suggested by RFC 3986"""
3061     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3062
3063
3064 def escape_url(url):
3065     """Escape URL as suggested by RFC 3986"""
3066     url_parsed = urllib.parse.urlparse(url)
3067     return url_parsed._replace(
3068         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3069         path=escape_rfc3986(url_parsed.path),
3070         params=escape_rfc3986(url_parsed.params),
3071         query=escape_rfc3986(url_parsed.query),
3072         fragment=escape_rfc3986(url_parsed.fragment)
3073     ).geturl()
3074
3075
3076 def parse_qs(url):
3077     return urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
3078
3079
3080 def read_batch_urls(batch_fd):
3081     def fixup(url):
3082         if not isinstance(url, str):
3083             url = url.decode('utf-8', 'replace')
3084         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3085         for bom in BOM_UTF8:
3086             if url.startswith(bom):
3087                 url = url[len(bom):]
3088         url = url.lstrip()
3089         if not url or url.startswith(('#', ';', ']')):
3090             return False
3091         # "#" cannot be stripped out since it is part of the URI
3092         # However, it can be safely stripped out if following a whitespace
3093         return re.split(r'\s#', url, 1)[0].rstrip()
3094
3095     with contextlib.closing(batch_fd) as fd:
3096         return [url for url in map(fixup, fd) if url]
3097
3098
3099 def urlencode_postdata(*args, **kargs):
3100     return urllib.parse.urlencode(*args, **kargs).encode('ascii')
3101
3102
3103 def update_url_query(url, query):
3104     if not query:
3105         return url
3106     parsed_url = urllib.parse.urlparse(url)
3107     qs = urllib.parse.parse_qs(parsed_url.query)
3108     qs.update(query)
3109     return urllib.parse.urlunparse(parsed_url._replace(
3110         query=urllib.parse.urlencode(qs, True)))
3111
3112
3113 def update_Request(req, url=None, data=None, headers=None, query=None):
3114     req_headers = req.headers.copy()
3115     req_headers.update(headers or {})
3116     req_data = data or req.data
3117     req_url = update_url_query(url or req.get_full_url(), query)
3118     req_get_method = req.get_method()
3119     if req_get_method == 'HEAD':
3120         req_type = HEADRequest
3121     elif req_get_method == 'PUT':
3122         req_type = PUTRequest
3123     else:
3124         req_type = urllib.request.Request
3125     new_req = req_type(
3126         req_url, data=req_data, headers=req_headers,
3127         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3128     if hasattr(req, 'timeout'):
3129         new_req.timeout = req.timeout
3130     return new_req
3131
3132
3133 def _multipart_encode_impl(data, boundary):
3134     content_type = 'multipart/form-data; boundary=%s' % boundary
3135
3136     out = b''
3137     for k, v in data.items():
3138         out += b'--' + boundary.encode('ascii') + b'\r\n'
3139         if isinstance(k, str):
3140             k = k.encode()
3141         if isinstance(v, str):
3142             v = v.encode()
3143         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3144         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3145         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3146         if boundary.encode('ascii') in content:
3147             raise ValueError('Boundary overlaps with data')
3148         out += content
3149
3150     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3151
3152     return out, content_type
3153
3154
3155 def multipart_encode(data, boundary=None):
3156     '''
3157     Encode a dict to RFC 7578-compliant form-data
3158
3159     data:
3160         A dict where keys and values can be either Unicode or bytes-like
3161         objects.
3162     boundary:
3163         If specified a Unicode object, it's used as the boundary. Otherwise
3164         a random boundary is generated.
3165
3166     Reference: https://tools.ietf.org/html/rfc7578
3167     '''
3168     has_specified_boundary = boundary is not None
3169
3170     while True:
3171         if boundary is None:
3172             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3173
3174         try:
3175             out, content_type = _multipart_encode_impl(data, boundary)
3176             break
3177         except ValueError:
3178             if has_specified_boundary:
3179                 raise
3180             boundary = None
3181
3182     return out, content_type
3183
3184
3185 def variadic(x, allowed_types=(str, bytes, dict)):
3186     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
3187
3188
3189 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3190     for val in map(d.get, variadic(key_or_keys)):
3191         if val is not None and (val or not skip_false_values):
3192             return val
3193     return default
3194
3195
3196 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3197     for f in funcs:
3198         try:
3199             val = f(*args, **kwargs)
3200         except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
3201             pass
3202         else:
3203             if expected_type is None or isinstance(val, expected_type):
3204                 return val
3205
3206
3207 def try_get(src, getter, expected_type=None):
3208     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3209
3210
3211 def filter_dict(dct, cndn=lambda _, v: v is not None):
3212     return {k: v for k, v in dct.items() if cndn(k, v)}
3213
3214
3215 def merge_dicts(*dicts):
3216     merged = {}
3217     for a_dict in dicts:
3218         for k, v in a_dict.items():
3219             if (v is not None and k not in merged
3220                     or isinstance(v, str) and merged[k] == ''):
3221                 merged[k] = v
3222     return merged
3223
3224
3225 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3226     return string if isinstance(string, str) else str(string, encoding, errors)
3227
3228
3229 US_RATINGS = {
3230     'G': 0,
3231     'PG': 10,
3232     'PG-13': 13,
3233     'R': 16,
3234     'NC': 18,
3235 }
3236
3237
3238 TV_PARENTAL_GUIDELINES = {
3239     'TV-Y': 0,
3240     'TV-Y7': 7,
3241     'TV-G': 0,
3242     'TV-PG': 0,
3243     'TV-14': 14,
3244     'TV-MA': 17,
3245 }
3246
3247
3248 def parse_age_limit(s):
3249     # isinstance(False, int) is True. So type() must be used instead
3250     if type(s) is int:  # noqa: E721
3251         return s if 0 <= s <= 21 else None
3252     elif not isinstance(s, str):
3253         return None
3254     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3255     if m:
3256         return int(m.group('age'))
3257     s = s.upper()
3258     if s in US_RATINGS:
3259         return US_RATINGS[s]
3260     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3261     if m:
3262         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3263     return None
3264
3265
3266 def strip_jsonp(code):
3267     return re.sub(
3268         r'''(?sx)^
3269             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3270             (?:\s*&&\s*(?P=func_name))?
3271             \s*\(\s*(?P<callback_data>.*)\);?
3272             \s*?(?://[^\n]*)*$''',
3273         r'\g<callback_data>', code)
3274
3275
3276 def js_to_json(code, vars={}, *, strict=False):
3277     # vars is a dict of var, val pairs to substitute
3278     STRING_QUOTES = '\'"'
3279     STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
3280     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3281     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3282     INTEGER_TABLE = (
3283         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3284         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3285     )
3286
3287     def process_escape(match):
3288         JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
3289         escape = match.group(1) or match.group(2)
3290
3291         return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
3292                 else R'\u00' if escape == 'x'
3293                 else '' if escape == '\n'
3294                 else escape)
3295
3296     def fix_kv(m):
3297         v = m.group(0)
3298         if v in ('true', 'false', 'null'):
3299             return v
3300         elif v in ('undefined', 'void 0'):
3301             return 'null'
3302         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3303             return ''
3304
3305         if v[0] in STRING_QUOTES:
3306             escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v[1:-1])
3307             return f'"{escaped}"'
3308
3309         for regex, base in INTEGER_TABLE:
3310             im = re.match(regex, v)
3311             if im:
3312                 i = int(im.group(1), base)
3313                 return f'"{i}":' if v.endswith(':') else str(i)
3314
3315         if v in vars:
3316             return json.dumps(vars[v])
3317
3318         if not strict:
3319             return f'"{v}"'
3320
3321         raise ValueError(f'Unknown value: {v}')
3322
3323     def create_map(mobj):
3324         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
3325
3326     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
3327     if not strict:
3328         code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3329         code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
3330
3331     return re.sub(rf'''(?sx)
3332         {STRING_RE}|
3333         {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
3334         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3335         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
3336         [0-9]+(?={SKIP_RE}:)|
3337         !+
3338         ''', fix_kv, code)
3339
3340
3341 def qualities(quality_ids):
3342     """ Get a numeric quality value out of a list of possible values """
3343     def q(qid):
3344         try:
3345             return quality_ids.index(qid)
3346         except ValueError:
3347             return -1
3348     return q
3349
3350
3351 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3352
3353
3354 DEFAULT_OUTTMPL = {
3355     'default': '%(title)s [%(id)s].%(ext)s',
3356     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3357 }
3358 OUTTMPL_TYPES = {
3359     'chapter': None,
3360     'subtitle': None,
3361     'thumbnail': None,
3362     'description': 'description',
3363     'annotation': 'annotations.xml',
3364     'infojson': 'info.json',
3365     'link': None,
3366     'pl_video': None,
3367     'pl_thumbnail': None,
3368     'pl_description': 'description',
3369     'pl_infojson': 'info.json',
3370 }
3371
3372 # As of [1] format syntax is:
3373 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3374 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3375 STR_FORMAT_RE_TMPL = r'''(?x)
3376     (?<!%)(?P<prefix>(?:%%)*)
3377     %
3378     (?P<has_key>\((?P<key>{0})\))?
3379     (?P<format>
3380         (?P<conversion>[#0\-+ ]+)?
3381         (?P<min_width>\d+)?
3382         (?P<precision>\.\d+)?
3383         (?P<len_mod>[hlL])?  # unused in python
3384         {1}  # conversion type
3385     )
3386 '''
3387
3388
3389 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3390
3391
3392 def limit_length(s, length):
3393     """ Add ellipses to overly long strings """
3394     if s is None:
3395         return None
3396     ELLIPSES = '...'
3397     if len(s) > length:
3398         return s[:length - len(ELLIPSES)] + ELLIPSES
3399     return s
3400
3401
3402 def version_tuple(v):
3403     return tuple(int(e) for e in re.split(r'[-.]', v))
3404
3405
3406 def is_outdated_version(version, limit, assume_new=True):
3407     if not version:
3408         return not assume_new
3409     try:
3410         return version_tuple(version) < version_tuple(limit)
3411     except ValueError:
3412         return not assume_new
3413
3414
3415 def ytdl_is_updateable():
3416     """ Returns if yt-dlp can be updated with -U """
3417
3418     from .update import is_non_updateable
3419
3420     return not is_non_updateable()
3421
3422
3423 def args_to_str(args):
3424     # Get a short string representation for a subprocess command
3425     return ' '.join(compat_shlex_quote(a) for a in args)
3426
3427
3428 def error_to_compat_str(err):
3429     return str(err)
3430
3431
3432 def error_to_str(err):
3433     return f'{type(err).__name__}: {err}'
3434
3435
3436 def mimetype2ext(mt):
3437     if mt is None:
3438         return None
3439
3440     mt, _, params = mt.partition(';')
3441     mt = mt.strip()
3442
3443     FULL_MAP = {
3444         'audio/mp4': 'm4a',
3445         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3446         # it's the most popular one
3447         'audio/mpeg': 'mp3',
3448         'audio/x-wav': 'wav',
3449         'audio/wav': 'wav',
3450         'audio/wave': 'wav',
3451     }
3452
3453     ext = FULL_MAP.get(mt)
3454     if ext is not None:
3455         return ext
3456
3457     SUBTYPE_MAP = {
3458         '3gpp': '3gp',
3459         'smptett+xml': 'tt',
3460         'ttaf+xml': 'dfxp',
3461         'ttml+xml': 'ttml',
3462         'x-flv': 'flv',
3463         'x-mp4-fragmented': 'mp4',
3464         'x-ms-sami': 'sami',
3465         'x-ms-wmv': 'wmv',
3466         'mpegurl': 'm3u8',
3467         'x-mpegurl': 'm3u8',
3468         'vnd.apple.mpegurl': 'm3u8',
3469         'dash+xml': 'mpd',
3470         'f4m+xml': 'f4m',
3471         'hds+xml': 'f4m',
3472         'vnd.ms-sstr+xml': 'ism',
3473         'quicktime': 'mov',
3474         'mp2t': 'ts',
3475         'x-wav': 'wav',
3476         'filmstrip+json': 'fs',
3477         'svg+xml': 'svg',
3478     }
3479
3480     _, _, subtype = mt.rpartition('/')
3481     ext = SUBTYPE_MAP.get(subtype.lower())
3482     if ext is not None:
3483         return ext
3484
3485     SUFFIX_MAP = {
3486         'json': 'json',
3487         'xml': 'xml',
3488         'zip': 'zip',
3489         'gzip': 'gz',
3490     }
3491
3492     _, _, suffix = subtype.partition('+')
3493     ext = SUFFIX_MAP.get(suffix)
3494     if ext is not None:
3495         return ext
3496
3497     return subtype.replace('+', '.')
3498
3499
3500 def ext2mimetype(ext_or_url):
3501     if not ext_or_url:
3502         return None
3503     if '.' not in ext_or_url:
3504         ext_or_url = f'file.{ext_or_url}'
3505     return mimetypes.guess_type(ext_or_url)[0]
3506
3507
3508 def parse_codecs(codecs_str):
3509     # http://tools.ietf.org/html/rfc6381
3510     if not codecs_str:
3511         return {}
3512     split_codecs = list(filter(None, map(
3513         str.strip, codecs_str.strip().strip(',').split(','))))
3514     vcodec, acodec, scodec, hdr = None, None, None, None
3515     for full_codec in split_codecs:
3516         parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
3517         if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3518                         'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3519             if vcodec:
3520                 continue
3521             vcodec = full_codec
3522             if parts[0] in ('dvh1', 'dvhe'):
3523                 hdr = 'DV'
3524             elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
3525                 hdr = 'HDR10'
3526             elif parts[:2] == ['vp9', '2']:
3527                 hdr = 'HDR10'
3528         elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac',
3529                           'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3530             acodec = acodec or full_codec
3531         elif parts[0] in ('stpp', 'wvtt'):
3532             scodec = scodec or full_codec
3533         else:
3534             write_string(f'WARNING: Unknown codec {full_codec}\n')
3535     if vcodec or acodec or scodec:
3536         return {
3537             'vcodec': vcodec or 'none',
3538             'acodec': acodec or 'none',
3539             'dynamic_range': hdr,
3540             **({'scodec': scodec} if scodec is not None else {}),
3541         }
3542     elif len(split_codecs) == 2:
3543         return {
3544             'vcodec': split_codecs[0],
3545             'acodec': split_codecs[1],
3546         }
3547     return {}
3548
3549
3550 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
3551     assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
3552
3553     allow_mkv = not preferences or 'mkv' in preferences
3554
3555     if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3556         return 'mkv'  # TODO: any other format allows this?
3557
3558     # TODO: All codecs supported by parse_codecs isn't handled here
3559     COMPATIBLE_CODECS = {
3560         'mp4': {
3561             'av1', 'hevc', 'avc1', 'mp4a',  # fourcc (m3u8, mpd)
3562             'h264', 'aacl', 'ec-3',  # Set in ISM
3563         },
3564         'webm': {
3565             'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3566             'vp9x', 'vp8x',  # in the webm spec
3567         },
3568     }
3569
3570     sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
3571     vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3572
3573     for ext in preferences or COMPATIBLE_CODECS.keys():
3574         codec_set = COMPATIBLE_CODECS.get(ext, set())
3575         if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3576             return ext
3577
3578     COMPATIBLE_EXTS = (
3579         {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3580         {'webm'},
3581     )
3582     for ext in preferences or vexts:
3583         current_exts = {ext, *vexts, *aexts}
3584         if ext == 'mkv' or current_exts == {ext} or any(
3585                 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3586             return ext
3587     return 'mkv' if allow_mkv else preferences[-1]
3588
3589
3590 def urlhandle_detect_ext(url_handle):
3591     getheader = url_handle.headers.get
3592
3593     cd = getheader('Content-Disposition')
3594     if cd:
3595         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3596         if m:
3597             e = determine_ext(m.group('filename'), default_ext=None)
3598             if e:
3599                 return e
3600
3601     return mimetype2ext(getheader('Content-Type'))
3602
3603
3604 def encode_data_uri(data, mime_type):
3605     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3606
3607
3608 def age_restricted(content_limit, age_limit):
3609     """ Returns True iff the content should be blocked """
3610
3611     if age_limit is None:  # No limit set
3612         return False
3613     if content_limit is None:
3614         return False  # Content available for everyone
3615     return age_limit < content_limit
3616
3617
3618 # List of known byte-order-marks (BOM)
3619 BOMS = [
3620     (b'\xef\xbb\xbf', 'utf-8'),
3621     (b'\x00\x00\xfe\xff', 'utf-32-be'),
3622     (b'\xff\xfe\x00\x00', 'utf-32-le'),
3623     (b'\xff\xfe', 'utf-16-le'),
3624     (b'\xfe\xff', 'utf-16-be'),
3625 ]
3626
3627
3628 def is_html(first_bytes):
3629     """ Detect whether a file contains HTML by examining its first bytes. """
3630
3631     encoding = 'utf-8'
3632     for bom, enc in BOMS:
3633         while first_bytes.startswith(bom):
3634             encoding, first_bytes = enc, first_bytes[len(bom):]
3635
3636     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3637
3638
3639 def determine_protocol(info_dict):
3640     protocol = info_dict.get('protocol')
3641     if protocol is not None:
3642         return protocol
3643
3644     url = sanitize_url(info_dict['url'])
3645     if url.startswith('rtmp'):
3646         return 'rtmp'
3647     elif url.startswith('mms'):
3648         return 'mms'
3649     elif url.startswith('rtsp'):
3650         return 'rtsp'
3651
3652     ext = determine_ext(url)
3653     if ext == 'm3u8':
3654         return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
3655     elif ext == 'f4m':
3656         return 'f4m'
3657
3658     return urllib.parse.urlparse(url).scheme
3659
3660
3661 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3662     """ Render a list of rows, each as a list of values.
3663     Text after a \t will be right aligned """
3664     def width(string):
3665         return len(remove_terminal_sequences(string).replace('\t', ''))
3666
3667     def get_max_lens(table):
3668         return [max(width(str(v)) for v in col) for col in zip(*table)]
3669
3670     def filter_using_list(row, filterArray):
3671         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3672
3673     max_lens = get_max_lens(data) if hide_empty else []
3674     header_row = filter_using_list(header_row, max_lens)
3675     data = [filter_using_list(row, max_lens) for row in data]
3676
3677     table = [header_row] + data
3678     max_lens = get_max_lens(table)
3679     extra_gap += 1
3680     if delim:
3681         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3682         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3683     for row in table:
3684         for pos, text in enumerate(map(str, row)):
3685             if '\t' in text:
3686                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3687             else:
3688                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3689     ret = '\n'.join(''.join(row).rstrip() for row in table)
3690     return ret
3691
3692
3693 def _match_one(filter_part, dct, incomplete):
3694     # TODO: Generalize code with YoutubeDL._build_format_filter
3695     STRING_OPERATORS = {
3696         '*=': operator.contains,
3697         '^=': lambda attr, value: attr.startswith(value),
3698         '$=': lambda attr, value: attr.endswith(value),
3699         '~=': lambda attr, value: re.search(value, attr),
3700     }
3701     COMPARISON_OPERATORS = {
3702         **STRING_OPERATORS,
3703         '<=': operator.le,  # "<=" must be defined above "<"
3704         '<': operator.lt,
3705         '>=': operator.ge,
3706         '>': operator.gt,
3707         '=': operator.eq,
3708     }
3709
3710     if isinstance(incomplete, bool):
3711         is_incomplete = lambda _: incomplete
3712     else:
3713         is_incomplete = lambda k: k in incomplete
3714
3715     operator_rex = re.compile(r'''(?x)
3716         (?P<key>[a-z_]+)
3717         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3718         (?:
3719             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3720             (?P<strval>.+?)
3721         )
3722         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3723     m = operator_rex.fullmatch(filter_part.strip())
3724     if m:
3725         m = m.groupdict()
3726         unnegated_op = COMPARISON_OPERATORS[m['op']]
3727         if m['negation']:
3728             op = lambda attr, value: not unnegated_op(attr, value)
3729         else:
3730             op = unnegated_op
3731         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3732         if m['quote']:
3733             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3734         actual_value = dct.get(m['key'])
3735         numeric_comparison = None
3736         if isinstance(actual_value, (int, float)):
3737             # If the original field is a string and matching comparisonvalue is
3738             # a number we should respect the origin of the original field
3739             # and process comparison value as a string (see
3740             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3741             try:
3742                 numeric_comparison = int(comparison_value)
3743             except ValueError:
3744                 numeric_comparison = parse_filesize(comparison_value)
3745                 if numeric_comparison is None:
3746                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3747                 if numeric_comparison is None:
3748                     numeric_comparison = parse_duration(comparison_value)
3749         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3750             raise ValueError('Operator %s only supports string values!' % m['op'])
3751         if actual_value is None:
3752             return is_incomplete(m['key']) or m['none_inclusive']
3753         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3754
3755     UNARY_OPERATORS = {
3756         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3757         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3758     }
3759     operator_rex = re.compile(r'''(?x)
3760         (?P<op>%s)\s*(?P<key>[a-z_]+)
3761         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3762     m = operator_rex.fullmatch(filter_part.strip())
3763     if m:
3764         op = UNARY_OPERATORS[m.group('op')]
3765         actual_value = dct.get(m.group('key'))
3766         if is_incomplete(m.group('key')) and actual_value is None:
3767             return True
3768         return op(actual_value)
3769
3770     raise ValueError('Invalid filter part %r' % filter_part)
3771
3772
3773 def match_str(filter_str, dct, incomplete=False):
3774     """ Filter a dictionary with a simple string syntax.
3775     @returns           Whether the filter passes
3776     @param incomplete  Set of keys that is expected to be missing from dct.
3777                        Can be True/False to indicate all/none of the keys may be missing.
3778                        All conditions on incomplete keys pass if the key is missing
3779     """
3780     return all(
3781         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3782         for filter_part in re.split(r'(?<!\\)&', filter_str))
3783
3784
3785 def match_filter_func(filters):
3786     if not filters:
3787         return None
3788     filters = set(variadic(filters))
3789
3790     interactive = '-' in filters
3791     if interactive:
3792         filters.remove('-')
3793
3794     def _match_func(info_dict, incomplete=False):
3795         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3796             return NO_DEFAULT if interactive and not incomplete else None
3797         else:
3798             video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
3799             filter_str = ') | ('.join(map(str.strip, filters))
3800             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3801     return _match_func
3802
3803
3804 class download_range_func:
3805     def __init__(self, chapters, ranges):
3806         self.chapters, self.ranges = chapters, ranges
3807
3808     def __call__(self, info_dict, ydl):
3809         if not self.ranges and not self.chapters:
3810             yield {}
3811
3812         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3813                    else 'Cannot match chapters since chapter information is unavailable')
3814         for regex in self.chapters or []:
3815             for i, chapter in enumerate(info_dict.get('chapters') or []):
3816                 if re.search(regex, chapter['title']):
3817                     warning = None
3818                     yield {**chapter, 'index': i}
3819         if self.chapters and warning:
3820             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3821
3822         yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
3823
3824     def __eq__(self, other):
3825         return (isinstance(other, download_range_func)
3826                 and self.chapters == other.chapters and self.ranges == other.ranges)
3827
3828
3829 def parse_dfxp_time_expr(time_expr):
3830     if not time_expr:
3831         return
3832
3833     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3834     if mobj:
3835         return float(mobj.group('time_offset'))
3836
3837     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3838     if mobj:
3839         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3840
3841
3842 def srt_subtitles_timecode(seconds):
3843     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3844
3845
3846 def ass_subtitles_timecode(seconds):
3847     time = timetuple_from_msec(seconds * 1000)
3848     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3849
3850
3851 def dfxp2srt(dfxp_data):
3852     '''
3853     @param dfxp_data A bytes-like object containing DFXP data
3854     @returns A unicode object containing converted SRT data
3855     '''
3856     LEGACY_NAMESPACES = (
3857         (b'http://www.w3.org/ns/ttml', [
3858             b'http://www.w3.org/2004/11/ttaf1',
3859             b'http://www.w3.org/2006/04/ttaf1',
3860             b'http://www.w3.org/2006/10/ttaf1',
3861         ]),
3862         (b'http://www.w3.org/ns/ttml#styling', [
3863             b'http://www.w3.org/ns/ttml#style',
3864         ]),
3865     )
3866
3867     SUPPORTED_STYLING = [
3868         'color',
3869         'fontFamily',
3870         'fontSize',
3871         'fontStyle',
3872         'fontWeight',
3873         'textDecoration'
3874     ]
3875
3876     _x = functools.partial(xpath_with_ns, ns_map={
3877         'xml': 'http://www.w3.org/XML/1998/namespace',
3878         'ttml': 'http://www.w3.org/ns/ttml',
3879         'tts': 'http://www.w3.org/ns/ttml#styling',
3880     })
3881
3882     styles = {}
3883     default_style = {}
3884
3885     class TTMLPElementParser:
3886         _out = ''
3887         _unclosed_elements = []
3888         _applied_styles = []
3889
3890         def start(self, tag, attrib):
3891             if tag in (_x('ttml:br'), 'br'):
3892                 self._out += '\n'
3893             else:
3894                 unclosed_elements = []
3895                 style = {}
3896                 element_style_id = attrib.get('style')
3897                 if default_style:
3898                     style.update(default_style)
3899                 if element_style_id:
3900                     style.update(styles.get(element_style_id, {}))
3901                 for prop in SUPPORTED_STYLING:
3902                     prop_val = attrib.get(_x('tts:' + prop))
3903                     if prop_val:
3904                         style[prop] = prop_val
3905                 if style:
3906                     font = ''
3907                     for k, v in sorted(style.items()):
3908                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3909                             continue
3910                         if k == 'color':
3911                             font += ' color="%s"' % v
3912                         elif k == 'fontSize':
3913                             font += ' size="%s"' % v
3914                         elif k == 'fontFamily':
3915                             font += ' face="%s"' % v
3916                         elif k == 'fontWeight' and v == 'bold':
3917                             self._out += '<b>'
3918                             unclosed_elements.append('b')
3919                         elif k == 'fontStyle' and v == 'italic':
3920                             self._out += '<i>'
3921                             unclosed_elements.append('i')
3922                         elif k == 'textDecoration' and v == 'underline':
3923                             self._out += '<u>'
3924                             unclosed_elements.append('u')
3925                     if font:
3926                         self._out += '<font' + font + '>'
3927                         unclosed_elements.append('font')
3928                     applied_style = {}
3929                     if self._applied_styles:
3930                         applied_style.update(self._applied_styles[-1])
3931                     applied_style.update(style)
3932                     self._applied_styles.append(applied_style)
3933                 self._unclosed_elements.append(unclosed_elements)
3934
3935         def end(self, tag):
3936             if tag not in (_x('ttml:br'), 'br'):
3937                 unclosed_elements = self._unclosed_elements.pop()
3938                 for element in reversed(unclosed_elements):
3939                     self._out += '</%s>' % element
3940                 if unclosed_elements and self._applied_styles:
3941                     self._applied_styles.pop()
3942
3943         def data(self, data):
3944             self._out += data
3945
3946         def close(self):
3947             return self._out.strip()
3948
3949     def parse_node(node):
3950         target = TTMLPElementParser()
3951         parser = xml.etree.ElementTree.XMLParser(target=target)
3952         parser.feed(xml.etree.ElementTree.tostring(node))
3953         return parser.close()
3954
3955     for k, v in LEGACY_NAMESPACES:
3956         for ns in v:
3957             dfxp_data = dfxp_data.replace(ns, k)
3958
3959     dfxp = compat_etree_fromstring(dfxp_data)
3960     out = []
3961     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3962
3963     if not paras:
3964         raise ValueError('Invalid dfxp/TTML subtitle')
3965
3966     repeat = False
3967     while True:
3968         for style in dfxp.findall(_x('.//ttml:style')):
3969             style_id = style.get('id') or style.get(_x('xml:id'))
3970             if not style_id:
3971                 continue
3972             parent_style_id = style.get('style')
3973             if parent_style_id:
3974                 if parent_style_id not in styles:
3975                     repeat = True
3976                     continue
3977                 styles[style_id] = styles[parent_style_id].copy()
3978             for prop in SUPPORTED_STYLING:
3979                 prop_val = style.get(_x('tts:' + prop))
3980                 if prop_val:
3981                     styles.setdefault(style_id, {})[prop] = prop_val
3982         if repeat:
3983             repeat = False
3984         else:
3985             break
3986
3987     for p in ('body', 'div'):
3988         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3989         if ele is None:
3990             continue
3991         style = styles.get(ele.get('style'))
3992         if not style:
3993             continue
3994         default_style.update(style)
3995
3996     for para, index in zip(paras, itertools.count(1)):
3997         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3998         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3999         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4000         if begin_time is None:
4001             continue
4002         if not end_time:
4003             if not dur:
4004                 continue
4005             end_time = begin_time + dur
4006         out.append('%d\n%s --> %s\n%s\n\n' % (
4007             index,
4008             srt_subtitles_timecode(begin_time),
4009             srt_subtitles_timecode(end_time),
4010             parse_node(para)))
4011
4012     return ''.join(out)
4013
4014
4015 def cli_option(params, command_option, param, separator=None):
4016     param = params.get(param)
4017     return ([] if param is None
4018             else [command_option, str(param)] if separator is None
4019             else [f'{command_option}{separator}{param}'])
4020
4021
4022 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4023     param = params.get(param)
4024     assert param in (True, False, None)
4025     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
4026
4027
4028 def cli_valueless_option(params, command_option, param, expected_value=True):
4029     return [command_option] if params.get(param) == expected_value else []
4030
4031
4032 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4033     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4034         if use_compat:
4035             return argdict
4036         else:
4037             argdict = None
4038     if argdict is None:
4039         return default
4040     assert isinstance(argdict, dict)
4041
4042     assert isinstance(keys, (list, tuple))
4043     for key_list in keys:
4044         arg_list = list(filter(
4045             lambda x: x is not None,
4046             [argdict.get(key.lower()) for key in variadic(key_list)]))
4047         if arg_list:
4048             return [arg for args in arg_list for arg in args]
4049     return default
4050
4051
4052 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
4053     main_key, exe = main_key.lower(), exe.lower()
4054     root_key = exe if main_key == exe else f'{main_key}+{exe}'
4055     keys = [f'{root_key}{k}' for k in (keys or [''])]
4056     if root_key in keys:
4057         if main_key != exe:
4058             keys.append((main_key, exe))
4059         keys.append('default')
4060     else:
4061         use_compat = False
4062     return cli_configuration_args(argdict, keys, default, use_compat)
4063
4064
4065 class ISO639Utils:
4066     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4067     _lang_map = {
4068         'aa': 'aar',
4069         'ab': 'abk',
4070         'ae': 'ave',
4071         'af': 'afr',
4072         'ak': 'aka',
4073         'am': 'amh',
4074         'an': 'arg',
4075         'ar': 'ara',
4076         'as': 'asm',
4077         'av': 'ava',
4078         'ay': 'aym',
4079         'az': 'aze',
4080         'ba': 'bak',
4081         'be': 'bel',
4082         'bg': 'bul',
4083         'bh': 'bih',
4084         'bi': 'bis',
4085         'bm': 'bam',
4086         'bn': 'ben',
4087         'bo': 'bod',
4088         'br': 'bre',
4089         'bs': 'bos',
4090         'ca': 'cat',
4091         'ce': 'che',
4092         'ch': 'cha',
4093         'co': 'cos',
4094         'cr': 'cre',
4095         'cs': 'ces',
4096         'cu': 'chu',
4097         'cv': 'chv',
4098         'cy': 'cym',
4099         'da': 'dan',
4100         'de': 'deu',
4101         'dv': 'div',
4102         'dz': 'dzo',
4103         'ee': 'ewe',
4104         'el': 'ell',
4105         'en': 'eng',
4106         'eo': 'epo',
4107         'es': 'spa',
4108         'et': 'est',
4109         'eu': 'eus',
4110         'fa': 'fas',
4111         'ff': 'ful',
4112         'fi': 'fin',
4113         'fj': 'fij',
4114         'fo': 'fao',
4115         'fr': 'fra',
4116         'fy': 'fry',
4117         'ga': 'gle',
4118         'gd': 'gla',
4119         'gl': 'glg',
4120         'gn': 'grn',
4121         'gu': 'guj',
4122         'gv': 'glv',
4123         'ha': 'hau',
4124         'he': 'heb',
4125         'iw': 'heb',  # Replaced by he in 1989 revision
4126         'hi': 'hin',
4127         'ho': 'hmo',
4128         'hr': 'hrv',
4129         'ht': 'hat',
4130         'hu': 'hun',
4131         'hy': 'hye',
4132         'hz': 'her',
4133         'ia': 'ina',
4134         'id': 'ind',
4135         'in': 'ind',  # Replaced by id in 1989 revision
4136         'ie': 'ile',
4137         'ig': 'ibo',
4138         'ii': 'iii',
4139         'ik': 'ipk',
4140         'io': 'ido',
4141         'is': 'isl',
4142         'it': 'ita',
4143         'iu': 'iku',
4144         'ja': 'jpn',
4145         'jv': 'jav',
4146         'ka': 'kat',
4147         'kg': 'kon',
4148         'ki': 'kik',
4149         'kj': 'kua',
4150         'kk': 'kaz',
4151         'kl': 'kal',
4152         'km': 'khm',
4153         'kn': 'kan',
4154         'ko': 'kor',
4155         'kr': 'kau',
4156         'ks': 'kas',
4157         'ku': 'kur',
4158         'kv': 'kom',
4159         'kw': 'cor',
4160         'ky': 'kir',
4161         'la': 'lat',
4162         'lb': 'ltz',
4163         'lg': 'lug',
4164         'li': 'lim',
4165         'ln': 'lin',
4166         'lo': 'lao',
4167         'lt': 'lit',
4168         'lu': 'lub',
4169         'lv': 'lav',
4170         'mg': 'mlg',
4171         'mh': 'mah',
4172         'mi': 'mri',
4173         'mk': 'mkd',
4174         'ml': 'mal',
4175         'mn': 'mon',
4176         'mr': 'mar',
4177         'ms': 'msa',
4178         'mt': 'mlt',
4179         'my': 'mya',
4180         'na': 'nau',
4181         'nb': 'nob',
4182         'nd': 'nde',
4183         'ne': 'nep',
4184         'ng': 'ndo',
4185         'nl': 'nld',
4186         'nn': 'nno',
4187         'no': 'nor',
4188         'nr': 'nbl',
4189         'nv': 'nav',
4190         'ny': 'nya',
4191         'oc': 'oci',
4192         'oj': 'oji',
4193         'om': 'orm',
4194         'or': 'ori',
4195         'os': 'oss',
4196         'pa': 'pan',
4197         'pi': 'pli',
4198         'pl': 'pol',
4199         'ps': 'pus',
4200         'pt': 'por',
4201         'qu': 'que',
4202         'rm': 'roh',
4203         'rn': 'run',
4204         'ro': 'ron',
4205         'ru': 'rus',
4206         'rw': 'kin',
4207         'sa': 'san',
4208         'sc': 'srd',
4209         'sd': 'snd',
4210         'se': 'sme',
4211         'sg': 'sag',
4212         'si': 'sin',
4213         'sk': 'slk',
4214         'sl': 'slv',
4215         'sm': 'smo',
4216         'sn': 'sna',
4217         'so': 'som',
4218         'sq': 'sqi',
4219         'sr': 'srp',
4220         'ss': 'ssw',
4221         'st': 'sot',
4222         'su': 'sun',
4223         'sv': 'swe',
4224         'sw': 'swa',
4225         'ta': 'tam',
4226         'te': 'tel',
4227         'tg': 'tgk',
4228         'th': 'tha',
4229         'ti': 'tir',
4230         'tk': 'tuk',
4231         'tl': 'tgl',
4232         'tn': 'tsn',
4233         'to': 'ton',
4234         'tr': 'tur',
4235         'ts': 'tso',
4236         'tt': 'tat',
4237         'tw': 'twi',
4238         'ty': 'tah',
4239         'ug': 'uig',
4240         'uk': 'ukr',
4241         'ur': 'urd',
4242         'uz': 'uzb',
4243         've': 'ven',
4244         'vi': 'vie',
4245         'vo': 'vol',
4246         'wa': 'wln',
4247         'wo': 'wol',
4248         'xh': 'xho',
4249         'yi': 'yid',
4250         'ji': 'yid',  # Replaced by yi in 1989 revision
4251         'yo': 'yor',
4252         'za': 'zha',
4253         'zh': 'zho',
4254         'zu': 'zul',
4255     }
4256
4257     @classmethod
4258     def short2long(cls, code):
4259         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4260         return cls._lang_map.get(code[:2])
4261
4262     @classmethod
4263     def long2short(cls, code):
4264         """Convert language code from ISO 639-2/T to ISO 639-1"""
4265         for short_name, long_name in cls._lang_map.items():
4266             if long_name == code:
4267                 return short_name
4268
4269
4270 class ISO3166Utils:
4271     # From http://data.okfn.org/data/core/country-list
4272     _country_map = {
4273         'AF': 'Afghanistan',
4274         'AX': 'Åland Islands',
4275         'AL': 'Albania',
4276         'DZ': 'Algeria',
4277         'AS': 'American Samoa',
4278         'AD': 'Andorra',
4279         'AO': 'Angola',
4280         'AI': 'Anguilla',
4281         'AQ': 'Antarctica',
4282         'AG': 'Antigua and Barbuda',
4283         'AR': 'Argentina',
4284         'AM': 'Armenia',
4285         'AW': 'Aruba',
4286         'AU': 'Australia',
4287         'AT': 'Austria',
4288         'AZ': 'Azerbaijan',
4289         'BS': 'Bahamas',
4290         'BH': 'Bahrain',
4291         'BD': 'Bangladesh',
4292         'BB': 'Barbados',
4293         'BY': 'Belarus',
4294         'BE': 'Belgium',
4295         'BZ': 'Belize',
4296         'BJ': 'Benin',
4297         'BM': 'Bermuda',
4298         'BT': 'Bhutan',
4299         'BO': 'Bolivia, Plurinational State of',
4300         'BQ': 'Bonaire, Sint Eustatius and Saba',
4301         'BA': 'Bosnia and Herzegovina',
4302         'BW': 'Botswana',
4303         'BV': 'Bouvet Island',
4304         'BR': 'Brazil',
4305         'IO': 'British Indian Ocean Territory',
4306         'BN': 'Brunei Darussalam',
4307         'BG': 'Bulgaria',
4308         'BF': 'Burkina Faso',
4309         'BI': 'Burundi',
4310         'KH': 'Cambodia',
4311         'CM': 'Cameroon',
4312         'CA': 'Canada',
4313         'CV': 'Cape Verde',
4314         'KY': 'Cayman Islands',
4315         'CF': 'Central African Republic',
4316         'TD': 'Chad',
4317         'CL': 'Chile',
4318         'CN': 'China',
4319         'CX': 'Christmas Island',
4320         'CC': 'Cocos (Keeling) Islands',
4321         'CO': 'Colombia',
4322         'KM': 'Comoros',
4323         'CG': 'Congo',
4324         'CD': 'Congo, the Democratic Republic of the',
4325         'CK': 'Cook Islands',
4326         'CR': 'Costa Rica',
4327         'CI': 'Côte d\'Ivoire',
4328         'HR': 'Croatia',
4329         'CU': 'Cuba',
4330         'CW': 'Curaçao',
4331         'CY': 'Cyprus',
4332         'CZ': 'Czech Republic',
4333         'DK': 'Denmark',
4334         'DJ': 'Djibouti',
4335         'DM': 'Dominica',
4336         'DO': 'Dominican Republic',
4337         'EC': 'Ecuador',
4338         'EG': 'Egypt',
4339         'SV': 'El Salvador',
4340         'GQ': 'Equatorial Guinea',
4341         'ER': 'Eritrea',
4342         'EE': 'Estonia',
4343         'ET': 'Ethiopia',
4344         'FK': 'Falkland Islands (Malvinas)',
4345         'FO': 'Faroe Islands',
4346         'FJ': 'Fiji',
4347         'FI': 'Finland',
4348         'FR': 'France',
4349         'GF': 'French Guiana',
4350         'PF': 'French Polynesia',
4351         'TF': 'French Southern Territories',
4352         'GA': 'Gabon',
4353         'GM': 'Gambia',
4354         'GE': 'Georgia',
4355         'DE': 'Germany',
4356         'GH': 'Ghana',
4357         'GI': 'Gibraltar',
4358         'GR': 'Greece',
4359         'GL': 'Greenland',
4360         'GD': 'Grenada',
4361         'GP': 'Guadeloupe',
4362         'GU': 'Guam',
4363         'GT': 'Guatemala',
4364         'GG': 'Guernsey',
4365         'GN': 'Guinea',
4366         'GW': 'Guinea-Bissau',
4367         'GY': 'Guyana',
4368         'HT': 'Haiti',
4369         'HM': 'Heard Island and McDonald Islands',
4370         'VA': 'Holy See (Vatican City State)',
4371         'HN': 'Honduras',
4372         'HK': 'Hong Kong',
4373         'HU': 'Hungary',
4374         'IS': 'Iceland',
4375         'IN': 'India',
4376         'ID': 'Indonesia',
4377         'IR': 'Iran, Islamic Republic of',
4378         'IQ': 'Iraq',
4379         'IE': 'Ireland',
4380         'IM': 'Isle of Man',
4381         'IL': 'Israel',
4382         'IT': 'Italy',
4383         'JM': 'Jamaica',
4384         'JP': 'Japan',
4385         'JE': 'Jersey',
4386         'JO': 'Jordan',
4387         'KZ': 'Kazakhstan',
4388         'KE': 'Kenya',
4389         'KI': 'Kiribati',
4390         'KP': 'Korea, Democratic People\'s Republic of',
4391         'KR': 'Korea, Republic of',
4392         'KW': 'Kuwait',
4393         'KG': 'Kyrgyzstan',
4394         'LA': 'Lao People\'s Democratic Republic',
4395         'LV': 'Latvia',
4396         'LB': 'Lebanon',
4397         'LS': 'Lesotho',
4398         'LR': 'Liberia',
4399         'LY': 'Libya',
4400         'LI': 'Liechtenstein',
4401         'LT': 'Lithuania',
4402         'LU': 'Luxembourg',
4403         'MO': 'Macao',
4404         'MK': 'Macedonia, the Former Yugoslav Republic of',
4405         'MG': 'Madagascar',
4406         'MW': 'Malawi',
4407         'MY': 'Malaysia',
4408         'MV': 'Maldives',
4409         'ML': 'Mali',
4410         'MT': 'Malta',
4411         'MH': 'Marshall Islands',
4412         'MQ': 'Martinique',
4413         'MR': 'Mauritania',
4414         'MU': 'Mauritius',
4415         'YT': 'Mayotte',
4416         'MX': 'Mexico',
4417         'FM': 'Micronesia, Federated States of',
4418         'MD': 'Moldova, Republic of',
4419         'MC': 'Monaco',
4420         'MN': 'Mongolia',
4421         'ME': 'Montenegro',
4422         'MS': 'Montserrat',
4423         'MA': 'Morocco',
4424         'MZ': 'Mozambique',
4425         'MM': 'Myanmar',
4426         'NA': 'Namibia',
4427         'NR': 'Nauru',
4428         'NP': 'Nepal',
4429         'NL': 'Netherlands',
4430         'NC': 'New Caledonia',
4431         'NZ': 'New Zealand',
4432         'NI': 'Nicaragua',
4433         'NE': 'Niger',
4434         'NG': 'Nigeria',
4435         'NU': 'Niue',
4436         'NF': 'Norfolk Island',
4437         'MP': 'Northern Mariana Islands',
4438         'NO': 'Norway',
4439         'OM': 'Oman',
4440         'PK': 'Pakistan',
4441         'PW': 'Palau',
4442         'PS': 'Palestine, State of',
4443         'PA': 'Panama',
4444         'PG': 'Papua New Guinea',
4445         'PY': 'Paraguay',
4446         'PE': 'Peru',
4447         'PH': 'Philippines',
4448         'PN': 'Pitcairn',
4449         'PL': 'Poland',
4450         'PT': 'Portugal',
4451         'PR': 'Puerto Rico',
4452         'QA': 'Qatar',
4453         'RE': 'Réunion',
4454         'RO': 'Romania',
4455         'RU': 'Russian Federation',
4456         'RW': 'Rwanda',
4457         'BL': 'Saint Barthélemy',
4458         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4459         'KN': 'Saint Kitts and Nevis',
4460         'LC': 'Saint Lucia',
4461         'MF': 'Saint Martin (French part)',
4462         'PM': 'Saint Pierre and Miquelon',
4463         'VC': 'Saint Vincent and the Grenadines',
4464         'WS': 'Samoa',
4465         'SM': 'San Marino',
4466         'ST': 'Sao Tome and Principe',
4467         'SA': 'Saudi Arabia',
4468         'SN': 'Senegal',
4469         'RS': 'Serbia',
4470         'SC': 'Seychelles',
4471         'SL': 'Sierra Leone',
4472         'SG': 'Singapore',
4473         'SX': 'Sint Maarten (Dutch part)',
4474         'SK': 'Slovakia',
4475         'SI': 'Slovenia',
4476         'SB': 'Solomon Islands',
4477         'SO': 'Somalia',
4478         'ZA': 'South Africa',
4479         'GS': 'South Georgia and the South Sandwich Islands',
4480         'SS': 'South Sudan',
4481         'ES': 'Spain',
4482         'LK': 'Sri Lanka',
4483         'SD': 'Sudan',
4484         'SR': 'Suriname',
4485         'SJ': 'Svalbard and Jan Mayen',
4486         'SZ': 'Swaziland',
4487         'SE': 'Sweden',
4488         'CH': 'Switzerland',
4489         'SY': 'Syrian Arab Republic',
4490         'TW': 'Taiwan, Province of China',
4491         'TJ': 'Tajikistan',
4492         'TZ': 'Tanzania, United Republic of',
4493         'TH': 'Thailand',
4494         'TL': 'Timor-Leste',
4495         'TG': 'Togo',
4496         'TK': 'Tokelau',
4497         'TO': 'Tonga',
4498         'TT': 'Trinidad and Tobago',
4499         'TN': 'Tunisia',
4500         'TR': 'Turkey',
4501         'TM': 'Turkmenistan',
4502         'TC': 'Turks and Caicos Islands',
4503         'TV': 'Tuvalu',
4504         'UG': 'Uganda',
4505         'UA': 'Ukraine',
4506         'AE': 'United Arab Emirates',
4507         'GB': 'United Kingdom',
4508         'US': 'United States',
4509         'UM': 'United States Minor Outlying Islands',
4510         'UY': 'Uruguay',
4511         'UZ': 'Uzbekistan',
4512         'VU': 'Vanuatu',
4513         'VE': 'Venezuela, Bolivarian Republic of',
4514         'VN': 'Viet Nam',
4515         'VG': 'Virgin Islands, British',
4516         'VI': 'Virgin Islands, U.S.',
4517         'WF': 'Wallis and Futuna',
4518         'EH': 'Western Sahara',
4519         'YE': 'Yemen',
4520         'ZM': 'Zambia',
4521         'ZW': 'Zimbabwe',
4522         # Not ISO 3166 codes, but used for IP blocks
4523         'AP': 'Asia/Pacific Region',
4524         'EU': 'Europe',
4525     }
4526
4527     @classmethod
4528     def short2full(cls, code):
4529         """Convert an ISO 3166-2 country code to the corresponding full name"""
4530         return cls._country_map.get(code.upper())
4531
4532
4533 class GeoUtils:
4534     # Major IPv4 address blocks per country
4535     _country_ip_map = {
4536         'AD': '46.172.224.0/19',
4537         'AE': '94.200.0.0/13',
4538         'AF': '149.54.0.0/17',
4539         'AG': '209.59.64.0/18',
4540         'AI': '204.14.248.0/21',
4541         'AL': '46.99.0.0/16',
4542         'AM': '46.70.0.0/15',
4543         'AO': '105.168.0.0/13',
4544         'AP': '182.50.184.0/21',
4545         'AQ': '23.154.160.0/24',
4546         'AR': '181.0.0.0/12',
4547         'AS': '202.70.112.0/20',
4548         'AT': '77.116.0.0/14',
4549         'AU': '1.128.0.0/11',
4550         'AW': '181.41.0.0/18',
4551         'AX': '185.217.4.0/22',
4552         'AZ': '5.197.0.0/16',
4553         'BA': '31.176.128.0/17',
4554         'BB': '65.48.128.0/17',
4555         'BD': '114.130.0.0/16',
4556         'BE': '57.0.0.0/8',
4557         'BF': '102.178.0.0/15',
4558         'BG': '95.42.0.0/15',
4559         'BH': '37.131.0.0/17',
4560         'BI': '154.117.192.0/18',
4561         'BJ': '137.255.0.0/16',
4562         'BL': '185.212.72.0/23',
4563         'BM': '196.12.64.0/18',
4564         'BN': '156.31.0.0/16',
4565         'BO': '161.56.0.0/16',
4566         'BQ': '161.0.80.0/20',
4567         'BR': '191.128.0.0/12',
4568         'BS': '24.51.64.0/18',
4569         'BT': '119.2.96.0/19',
4570         'BW': '168.167.0.0/16',
4571         'BY': '178.120.0.0/13',
4572         'BZ': '179.42.192.0/18',
4573         'CA': '99.224.0.0/11',
4574         'CD': '41.243.0.0/16',
4575         'CF': '197.242.176.0/21',
4576         'CG': '160.113.0.0/16',
4577         'CH': '85.0.0.0/13',
4578         'CI': '102.136.0.0/14',
4579         'CK': '202.65.32.0/19',
4580         'CL': '152.172.0.0/14',
4581         'CM': '102.244.0.0/14',
4582         'CN': '36.128.0.0/10',
4583         'CO': '181.240.0.0/12',
4584         'CR': '201.192.0.0/12',
4585         'CU': '152.206.0.0/15',
4586         'CV': '165.90.96.0/19',
4587         'CW': '190.88.128.0/17',
4588         'CY': '31.153.0.0/16',
4589         'CZ': '88.100.0.0/14',
4590         'DE': '53.0.0.0/8',
4591         'DJ': '197.241.0.0/17',
4592         'DK': '87.48.0.0/12',
4593         'DM': '192.243.48.0/20',
4594         'DO': '152.166.0.0/15',
4595         'DZ': '41.96.0.0/12',
4596         'EC': '186.68.0.0/15',
4597         'EE': '90.190.0.0/15',
4598         'EG': '156.160.0.0/11',
4599         'ER': '196.200.96.0/20',
4600         'ES': '88.0.0.0/11',
4601         'ET': '196.188.0.0/14',
4602         'EU': '2.16.0.0/13',
4603         'FI': '91.152.0.0/13',
4604         'FJ': '144.120.0.0/16',
4605         'FK': '80.73.208.0/21',
4606         'FM': '119.252.112.0/20',
4607         'FO': '88.85.32.0/19',
4608         'FR': '90.0.0.0/9',
4609         'GA': '41.158.0.0/15',
4610         'GB': '25.0.0.0/8',
4611         'GD': '74.122.88.0/21',
4612         'GE': '31.146.0.0/16',
4613         'GF': '161.22.64.0/18',
4614         'GG': '62.68.160.0/19',
4615         'GH': '154.160.0.0/12',
4616         'GI': '95.164.0.0/16',
4617         'GL': '88.83.0.0/19',
4618         'GM': '160.182.0.0/15',
4619         'GN': '197.149.192.0/18',
4620         'GP': '104.250.0.0/19',
4621         'GQ': '105.235.224.0/20',
4622         'GR': '94.64.0.0/13',
4623         'GT': '168.234.0.0/16',
4624         'GU': '168.123.0.0/16',
4625         'GW': '197.214.80.0/20',
4626         'GY': '181.41.64.0/18',
4627         'HK': '113.252.0.0/14',
4628         'HN': '181.210.0.0/16',
4629         'HR': '93.136.0.0/13',
4630         'HT': '148.102.128.0/17',
4631         'HU': '84.0.0.0/14',
4632         'ID': '39.192.0.0/10',
4633         'IE': '87.32.0.0/12',
4634         'IL': '79.176.0.0/13',
4635         'IM': '5.62.80.0/20',
4636         'IN': '117.192.0.0/10',
4637         'IO': '203.83.48.0/21',
4638         'IQ': '37.236.0.0/14',
4639         'IR': '2.176.0.0/12',
4640         'IS': '82.221.0.0/16',
4641         'IT': '79.0.0.0/10',
4642         'JE': '87.244.64.0/18',
4643         'JM': '72.27.0.0/17',
4644         'JO': '176.29.0.0/16',
4645         'JP': '133.0.0.0/8',
4646         'KE': '105.48.0.0/12',
4647         'KG': '158.181.128.0/17',
4648         'KH': '36.37.128.0/17',
4649         'KI': '103.25.140.0/22',
4650         'KM': '197.255.224.0/20',
4651         'KN': '198.167.192.0/19',
4652         'KP': '175.45.176.0/22',
4653         'KR': '175.192.0.0/10',
4654         'KW': '37.36.0.0/14',
4655         'KY': '64.96.0.0/15',
4656         'KZ': '2.72.0.0/13',
4657         'LA': '115.84.64.0/18',
4658         'LB': '178.135.0.0/16',
4659         'LC': '24.92.144.0/20',
4660         'LI': '82.117.0.0/19',
4661         'LK': '112.134.0.0/15',
4662         'LR': '102.183.0.0/16',
4663         'LS': '129.232.0.0/17',
4664         'LT': '78.56.0.0/13',
4665         'LU': '188.42.0.0/16',
4666         'LV': '46.109.0.0/16',
4667         'LY': '41.252.0.0/14',
4668         'MA': '105.128.0.0/11',
4669         'MC': '88.209.64.0/18',
4670         'MD': '37.246.0.0/16',
4671         'ME': '178.175.0.0/17',
4672         'MF': '74.112.232.0/21',
4673         'MG': '154.126.0.0/17',
4674         'MH': '117.103.88.0/21',
4675         'MK': '77.28.0.0/15',
4676         'ML': '154.118.128.0/18',
4677         'MM': '37.111.0.0/17',
4678         'MN': '49.0.128.0/17',
4679         'MO': '60.246.0.0/16',
4680         'MP': '202.88.64.0/20',
4681         'MQ': '109.203.224.0/19',
4682         'MR': '41.188.64.0/18',
4683         'MS': '208.90.112.0/22',
4684         'MT': '46.11.0.0/16',
4685         'MU': '105.16.0.0/12',
4686         'MV': '27.114.128.0/18',
4687         'MW': '102.70.0.0/15',
4688         'MX': '187.192.0.0/11',
4689         'MY': '175.136.0.0/13',
4690         'MZ': '197.218.0.0/15',
4691         'NA': '41.182.0.0/16',
4692         'NC': '101.101.0.0/18',
4693         'NE': '197.214.0.0/18',
4694         'NF': '203.17.240.0/22',
4695         'NG': '105.112.0.0/12',
4696         'NI': '186.76.0.0/15',
4697         'NL': '145.96.0.0/11',
4698         'NO': '84.208.0.0/13',
4699         'NP': '36.252.0.0/15',
4700         'NR': '203.98.224.0/19',
4701         'NU': '49.156.48.0/22',
4702         'NZ': '49.224.0.0/14',
4703         'OM': '5.36.0.0/15',
4704         'PA': '186.72.0.0/15',
4705         'PE': '186.160.0.0/14',
4706         'PF': '123.50.64.0/18',
4707         'PG': '124.240.192.0/19',
4708         'PH': '49.144.0.0/13',
4709         'PK': '39.32.0.0/11',
4710         'PL': '83.0.0.0/11',
4711         'PM': '70.36.0.0/20',
4712         'PR': '66.50.0.0/16',
4713         'PS': '188.161.0.0/16',
4714         'PT': '85.240.0.0/13',
4715         'PW': '202.124.224.0/20',
4716         'PY': '181.120.0.0/14',
4717         'QA': '37.210.0.0/15',
4718         'RE': '102.35.0.0/16',
4719         'RO': '79.112.0.0/13',
4720         'RS': '93.86.0.0/15',
4721         'RU': '5.136.0.0/13',
4722         'RW': '41.186.0.0/16',
4723         'SA': '188.48.0.0/13',
4724         'SB': '202.1.160.0/19',
4725         'SC': '154.192.0.0/11',
4726         'SD': '102.120.0.0/13',
4727         'SE': '78.64.0.0/12',
4728         'SG': '8.128.0.0/10',
4729         'SI': '188.196.0.0/14',
4730         'SK': '78.98.0.0/15',
4731         'SL': '102.143.0.0/17',
4732         'SM': '89.186.32.0/19',
4733         'SN': '41.82.0.0/15',
4734         'SO': '154.115.192.0/18',
4735         'SR': '186.179.128.0/17',
4736         'SS': '105.235.208.0/21',
4737         'ST': '197.159.160.0/19',
4738         'SV': '168.243.0.0/16',
4739         'SX': '190.102.0.0/20',
4740         'SY': '5.0.0.0/16',
4741         'SZ': '41.84.224.0/19',
4742         'TC': '65.255.48.0/20',
4743         'TD': '154.68.128.0/19',
4744         'TG': '196.168.0.0/14',
4745         'TH': '171.96.0.0/13',
4746         'TJ': '85.9.128.0/18',
4747         'TK': '27.96.24.0/21',
4748         'TL': '180.189.160.0/20',
4749         'TM': '95.85.96.0/19',
4750         'TN': '197.0.0.0/11',
4751         'TO': '175.176.144.0/21',
4752         'TR': '78.160.0.0/11',
4753         'TT': '186.44.0.0/15',
4754         'TV': '202.2.96.0/19',
4755         'TW': '120.96.0.0/11',
4756         'TZ': '156.156.0.0/14',
4757         'UA': '37.52.0.0/14',
4758         'UG': '102.80.0.0/13',
4759         'US': '6.0.0.0/8',
4760         'UY': '167.56.0.0/13',
4761         'UZ': '84.54.64.0/18',
4762         'VA': '212.77.0.0/19',
4763         'VC': '207.191.240.0/21',
4764         'VE': '186.88.0.0/13',
4765         'VG': '66.81.192.0/20',
4766         'VI': '146.226.0.0/16',
4767         'VN': '14.160.0.0/11',
4768         'VU': '202.80.32.0/20',
4769         'WF': '117.20.32.0/21',
4770         'WS': '202.4.32.0/19',
4771         'YE': '134.35.0.0/16',
4772         'YT': '41.242.116.0/22',
4773         'ZA': '41.0.0.0/11',
4774         'ZM': '102.144.0.0/13',
4775         'ZW': '102.177.192.0/18',
4776     }
4777
4778     @classmethod
4779     def random_ipv4(cls, code_or_block):
4780         if len(code_or_block) == 2:
4781             block = cls._country_ip_map.get(code_or_block.upper())
4782             if not block:
4783                 return None
4784         else:
4785             block = code_or_block
4786         addr, preflen = block.split('/')
4787         addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
4788         addr_max = addr_min | (0xffffffff >> int(preflen))
4789         return str(socket.inet_ntoa(
4790             struct.pack('!L', random.randint(addr_min, addr_max))))
4791
4792
4793 class PerRequestProxyHandler(urllib.request.ProxyHandler):
4794     def __init__(self, proxies=None):
4795         # Set default handlers
4796         for type in ('http', 'https'):
4797             setattr(self, '%s_open' % type,
4798                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4799                         meth(r, proxy, type))
4800         urllib.request.ProxyHandler.__init__(self, proxies)
4801
4802     def proxy_open(self, req, proxy, type):
4803         req_proxy = req.headers.get('Ytdl-request-proxy')
4804         if req_proxy is not None:
4805             proxy = req_proxy
4806             del req.headers['Ytdl-request-proxy']
4807
4808         if proxy == '__noproxy__':
4809             return None  # No Proxy
4810         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4811             req.add_header('Ytdl-socks-proxy', proxy)
4812             # yt-dlp's http/https handlers do wrapping the socket with socks
4813             return None
4814         return urllib.request.ProxyHandler.proxy_open(
4815             self, req, proxy, type)
4816
4817
4818 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4819 # released into Public Domain
4820 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4821
4822 def long_to_bytes(n, blocksize=0):
4823     """long_to_bytes(n:long, blocksize:int) : string
4824     Convert a long integer to a byte string.
4825
4826     If optional blocksize is given and greater than zero, pad the front of the
4827     byte string with binary zeros so that the length is a multiple of
4828     blocksize.
4829     """
4830     # after much testing, this algorithm was deemed to be the fastest
4831     s = b''
4832     n = int(n)
4833     while n > 0:
4834         s = struct.pack('>I', n & 0xffffffff) + s
4835         n = n >> 32
4836     # strip off leading zeros
4837     for i in range(len(s)):
4838         if s[i] != b'\000'[0]:
4839             break
4840     else:
4841         # only happens when n == 0
4842         s = b'\000'
4843         i = 0
4844     s = s[i:]
4845     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4846     # de-padding being done above, but sigh...
4847     if blocksize > 0 and len(s) % blocksize:
4848         s = (blocksize - len(s) % blocksize) * b'\000' + s
4849     return s
4850
4851
4852 def bytes_to_long(s):
4853     """bytes_to_long(string) : long
4854     Convert a byte string to a long integer.
4855
4856     This is (essentially) the inverse of long_to_bytes().
4857     """
4858     acc = 0
4859     length = len(s)
4860     if length % 4:
4861         extra = (4 - length % 4)
4862         s = b'\000' * extra + s
4863         length = length + extra
4864     for i in range(0, length, 4):
4865         acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
4866     return acc
4867
4868
4869 def ohdave_rsa_encrypt(data, exponent, modulus):
4870     '''
4871     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4872
4873     Input:
4874         data: data to encrypt, bytes-like object
4875         exponent, modulus: parameter e and N of RSA algorithm, both integer
4876     Output: hex string of encrypted data
4877
4878     Limitation: supports one block encryption only
4879     '''
4880
4881     payload = int(binascii.hexlify(data[::-1]), 16)
4882     encrypted = pow(payload, exponent, modulus)
4883     return '%x' % encrypted
4884
4885
4886 def pkcs1pad(data, length):
4887     """
4888     Padding input data with PKCS#1 scheme
4889
4890     @param {int[]} data        input data
4891     @param {int}   length      target length
4892     @returns {int[]}           padded data
4893     """
4894     if len(data) > length - 11:
4895         raise ValueError('Input data too long for PKCS#1 padding')
4896
4897     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4898     return [0, 2] + pseudo_random + [0] + data
4899
4900
4901 def _base_n_table(n, table):
4902     if not table and not n:
4903         raise ValueError('Either table or n must be specified')
4904     table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
4905
4906     if n and n != len(table):
4907         raise ValueError(f'base {n} exceeds table length {len(table)}')
4908     return table
4909
4910
4911 def encode_base_n(num, n=None, table=None):
4912     """Convert given int to a base-n string"""
4913     table = _base_n_table(n, table)
4914     if not num:
4915         return table[0]
4916
4917     result, base = '', len(table)
4918     while num:
4919         result = table[num % base] + result
4920         num = num // base
4921     return result
4922
4923
4924 def decode_base_n(string, n=None, table=None):
4925     """Convert given base-n string to int"""
4926     table = {char: index for index, char in enumerate(_base_n_table(n, table))}
4927     result, base = 0, len(table)
4928     for char in string:
4929         result = result * base + table[char]
4930     return result
4931
4932
4933 def decode_base(value, digits):
4934     deprecation_warning(f'{__name__}.decode_base is deprecated and may be removed '
4935                         f'in a future version. Use {__name__}.decode_base_n instead')
4936     return decode_base_n(value, table=digits)
4937
4938
4939 def decode_packed_codes(code):
4940     mobj = re.search(PACKED_CODES_RE, code)
4941     obfuscated_code, base, count, symbols = mobj.groups()
4942     base = int(base)
4943     count = int(count)
4944     symbols = symbols.split('|')
4945     symbol_table = {}
4946
4947     while count:
4948         count -= 1
4949         base_n_count = encode_base_n(count, base)
4950         symbol_table[base_n_count] = symbols[count] or base_n_count
4951
4952     return re.sub(
4953         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4954         obfuscated_code)
4955
4956
4957 def caesar(s, alphabet, shift):
4958     if shift == 0:
4959         return s
4960     l = len(alphabet)
4961     return ''.join(
4962         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4963         for c in s)
4964
4965
4966 def rot47(s):
4967     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4968
4969
4970 def parse_m3u8_attributes(attrib):
4971     info = {}
4972     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4973         if val.startswith('"'):
4974             val = val[1:-1]
4975         info[key] = val
4976     return info
4977
4978
4979 def urshift(val, n):
4980     return val >> n if val >= 0 else (val + 0x100000000) >> n
4981
4982
4983 # Based on png2str() written by @gdkchan and improved by @yokrysty
4984 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4985 def decode_png(png_data):
4986     # Reference: https://www.w3.org/TR/PNG/
4987     header = png_data[8:]
4988
4989     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4990         raise OSError('Not a valid PNG file.')
4991
4992     int_map = {1: '>B', 2: '>H', 4: '>I'}
4993     unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
4994
4995     chunks = []
4996
4997     while header:
4998         length = unpack_integer(header[:4])
4999         header = header[4:]
5000
5001         chunk_type = header[:4]
5002         header = header[4:]
5003
5004         chunk_data = header[:length]
5005         header = header[length:]
5006
5007         header = header[4:]  # Skip CRC
5008
5009         chunks.append({
5010             'type': chunk_type,
5011             'length': length,
5012             'data': chunk_data
5013         })
5014
5015     ihdr = chunks[0]['data']
5016
5017     width = unpack_integer(ihdr[:4])
5018     height = unpack_integer(ihdr[4:8])
5019
5020     idat = b''
5021
5022     for chunk in chunks:
5023         if chunk['type'] == b'IDAT':
5024             idat += chunk['data']
5025
5026     if not idat:
5027         raise OSError('Unable to read PNG data.')
5028
5029     decompressed_data = bytearray(zlib.decompress(idat))
5030
5031     stride = width * 3
5032     pixels = []
5033
5034     def _get_pixel(idx):
5035         x = idx % stride
5036         y = idx // stride
5037         return pixels[y][x]
5038
5039     for y in range(height):
5040         basePos = y * (1 + stride)
5041         filter_type = decompressed_data[basePos]
5042
5043         current_row = []
5044
5045         pixels.append(current_row)
5046
5047         for x in range(stride):
5048             color = decompressed_data[1 + basePos + x]
5049             basex = y * stride + x
5050             left = 0
5051             up = 0
5052
5053             if x > 2:
5054                 left = _get_pixel(basex - 3)
5055             if y > 0:
5056                 up = _get_pixel(basex - stride)
5057
5058             if filter_type == 1:  # Sub
5059                 color = (color + left) & 0xff
5060             elif filter_type == 2:  # Up
5061                 color = (color + up) & 0xff
5062             elif filter_type == 3:  # Average
5063                 color = (color + ((left + up) >> 1)) & 0xff
5064             elif filter_type == 4:  # Paeth
5065                 a = left
5066                 b = up
5067                 c = 0
5068
5069                 if x > 2 and y > 0:
5070                     c = _get_pixel(basex - stride - 3)
5071
5072                 p = a + b - c
5073
5074                 pa = abs(p - a)
5075                 pb = abs(p - b)
5076                 pc = abs(p - c)
5077
5078                 if pa <= pb and pa <= pc:
5079                     color = (color + a) & 0xff
5080                 elif pb <= pc:
5081                     color = (color + b) & 0xff
5082                 else:
5083                     color = (color + c) & 0xff
5084
5085             current_row.append(color)
5086
5087     return width, height, pixels
5088
5089
5090 def write_xattr(path, key, value):
5091     # Windows: Write xattrs to NTFS Alternate Data Streams:
5092     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5093     if compat_os_name == 'nt':
5094         assert ':' not in key
5095         assert os.path.exists(path)
5096
5097         try:
5098             with open(f'{path}:{key}', 'wb') as f:
5099                 f.write(value)
5100         except OSError as e:
5101             raise XAttrMetadataError(e.errno, e.strerror)
5102         return
5103
5104     # UNIX Method 1. Use xattrs/pyxattrs modules
5105
5106     setxattr = None
5107     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
5108         # Unicode arguments are not supported in pyxattr until version 0.5.0
5109         # See https://github.com/ytdl-org/youtube-dl/issues/5498
5110         if version_tuple(xattr.__version__) >= (0, 5, 0):
5111             setxattr = xattr.set
5112     elif xattr:
5113         setxattr = xattr.setxattr
5114
5115     if setxattr:
5116         try:
5117             setxattr(path, key, value)
5118         except OSError as e:
5119             raise XAttrMetadataError(e.errno, e.strerror)
5120         return
5121
5122     # UNIX Method 2. Use setfattr/xattr executables
5123     exe = ('setfattr' if check_executable('setfattr', ['--version'])
5124            else 'xattr' if check_executable('xattr', ['-h']) else None)
5125     if not exe:
5126         raise XAttrUnavailableError(
5127             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
5128             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
5129
5130     value = value.decode()
5131     try:
5132         _, stderr, returncode = Popen.run(
5133             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
5134             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5135     except OSError as e:
5136         raise XAttrMetadataError(e.errno, e.strerror)
5137     if returncode:
5138         raise XAttrMetadataError(returncode, stderr)
5139
5140
5141 def random_birthday(year_field, month_field, day_field):
5142     start_date = datetime.date(1950, 1, 1)
5143     end_date = datetime.date(1995, 12, 31)
5144     offset = random.randint(0, (end_date - start_date).days)
5145     random_date = start_date + datetime.timedelta(offset)
5146     return {
5147         year_field: str(random_date.year),
5148         month_field: str(random_date.month),
5149         day_field: str(random_date.day),
5150     }
5151
5152
5153 # Templates for internet shortcut files, which are plain text files.
5154 DOT_URL_LINK_TEMPLATE = '''\
5155 [InternetShortcut]
5156 URL=%(url)s
5157 '''
5158
5159 DOT_WEBLOC_LINK_TEMPLATE = '''\
5160 <?xml version="1.0" encoding="UTF-8"?>
5161 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5162 <plist version="1.0">
5163 <dict>
5164 \t<key>URL</key>
5165 \t<string>%(url)s</string>
5166 </dict>
5167 </plist>
5168 '''
5169
5170 DOT_DESKTOP_LINK_TEMPLATE = '''\
5171 [Desktop Entry]
5172 Encoding=UTF-8
5173 Name=%(filename)s
5174 Type=Link
5175 URL=%(url)s
5176 Icon=text-html
5177 '''
5178
5179 LINK_TEMPLATES = {
5180     'url': DOT_URL_LINK_TEMPLATE,
5181     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5182     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5183 }
5184
5185
5186 def iri_to_uri(iri):
5187     """
5188     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5189
5190     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5191     """
5192
5193     iri_parts = urllib.parse.urlparse(iri)
5194
5195     if '[' in iri_parts.netloc:
5196         raise ValueError('IPv6 URIs are not, yet, supported.')
5197         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5198
5199     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5200
5201     net_location = ''
5202     if iri_parts.username:
5203         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5204         if iri_parts.password is not None:
5205             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5206         net_location += '@'
5207
5208     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5209     # The 'idna' encoding produces ASCII text.
5210     if iri_parts.port is not None and iri_parts.port != 80:
5211         net_location += ':' + str(iri_parts.port)
5212
5213     return urllib.parse.urlunparse(
5214         (iri_parts.scheme,
5215             net_location,
5216
5217             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5218
5219             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5220             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5221
5222             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5223             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5224
5225             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5226
5227     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5228
5229
5230 def to_high_limit_path(path):
5231     if sys.platform in ['win32', 'cygwin']:
5232         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5233         return '\\\\?\\' + os.path.abspath(path)
5234
5235     return path
5236
5237
5238 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
5239     val = traverse_obj(obj, *variadic(field))
5240     if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
5241         return default
5242     return template % func(val)
5243
5244
5245 def clean_podcast_url(url):
5246     return re.sub(r'''(?x)
5247         (?:
5248             (?:
5249                 chtbl\.com/track|
5250                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5251                 play\.podtrac\.com
5252             )/[^/]+|
5253             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5254             flex\.acast\.com|
5255             pd(?:
5256                 cn\.co| # https://podcorn.com/analytics-prefix/
5257                 st\.fm # https://podsights.com/docs/
5258             )/e
5259         )/''', '', url)
5260
5261
5262 _HEX_TABLE = '0123456789abcdef'
5263
5264
5265 def random_uuidv4():
5266     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5267
5268
5269 def make_dir(path, to_screen=None):
5270     try:
5271         dn = os.path.dirname(path)
5272         if dn and not os.path.exists(dn):
5273             os.makedirs(dn)
5274         return True
5275     except OSError as err:
5276         if callable(to_screen) is not None:
5277             to_screen('unable to create directory ' + error_to_compat_str(err))
5278         return False
5279
5280
5281 def get_executable_path():
5282     from .update import _get_variant_and_executable_path
5283
5284     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5285
5286
5287 def load_plugins(name, suffix, namespace):
5288     classes = {}
5289     with contextlib.suppress(FileNotFoundError):
5290         plugins_spec = importlib.util.spec_from_file_location(
5291             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5292         plugins = importlib.util.module_from_spec(plugins_spec)
5293         sys.modules[plugins_spec.name] = plugins
5294         plugins_spec.loader.exec_module(plugins)
5295         for name in dir(plugins):
5296             if name in namespace:
5297                 continue
5298             if not name.endswith(suffix):
5299                 continue
5300             klass = getattr(plugins, name)
5301             classes[name] = namespace[name] = klass
5302     return classes
5303
5304
5305 def traverse_obj(
5306         obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
5307         casesense=True, is_user_input=False, traverse_string=False):
5308     """
5309     Safely traverse nested `dict`s and `Sequence`s
5310
5311     >>> obj = [{}, {"key": "value"}]
5312     >>> traverse_obj(obj, (1, "key"))
5313     "value"
5314
5315     Each of the provided `paths` is tested and the first producing a valid result will be returned.
5316     The next path will also be tested if the path branched but no results could be found.
5317     Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
5318     A value of None is treated as the absence of a value.
5319
5320     The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
5321
5322     The keys in the path can be one of:
5323         - `None`:           Return the current object.
5324         - `str`/`int`:      Return `obj[key]`. For `re.Match, return `obj.group(key)`.
5325         - `slice`:          Branch out and return all values in `obj[key]`.
5326         - `Ellipsis`:       Branch out and return a list of all values.
5327         - `tuple`/`list`:   Branch out and return a list of all matching values.
5328                             Read as: `[traverse_obj(obj, branch) for branch in branches]`.
5329         - `function`:       Branch out and return values filtered by the function.
5330                             Read as: `[value for key, value in obj if function(key, value)]`.
5331                             For `Sequence`s, `key` is the index of the value.
5332         - `dict`            Transform the current object and return a matching dict.
5333                             Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
5334
5335         `tuple`, `list`, and `dict` all support nested paths and branches.
5336
5337     @params paths           Paths which to traverse by.
5338     @param default          Value to return if the paths do not match.
5339     @param expected_type    If a `type`, only accept final values of this type.
5340                             If any other callable, try to call the function on each result.
5341     @param get_all          If `False`, return the first matching result, otherwise all matching ones.
5342     @param casesense        If `False`, consider string dictionary keys as case insensitive.
5343
5344     The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
5345
5346     @param is_user_input    Whether the keys are generated from user input.
5347                             If `True` strings get converted to `int`/`slice` if needed.
5348     @param traverse_string  Whether to traverse into objects as strings.
5349                             If `True`, any non-compatible object will first be
5350                             converted into a string and then traversed into.
5351
5352
5353     @returns                The result of the object traversal.
5354                             If successful, `get_all=True`, and the path branches at least once,
5355                             then a list of results is returned instead.
5356                             A list is always returned if the last path branches and no `default` is given.
5357     """
5358     is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
5359     casefold = lambda k: k.casefold() if isinstance(k, str) else k
5360
5361     if isinstance(expected_type, type):
5362         type_test = lambda val: val if isinstance(val, expected_type) else None
5363     else:
5364         type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
5365
5366     def apply_key(key, obj):
5367         if obj is None:
5368             return
5369
5370         elif key is None:
5371             yield obj
5372
5373         elif isinstance(key, (list, tuple)):
5374             for branch in key:
5375                 _, result = apply_path(obj, branch)
5376                 yield from result
5377
5378         elif key is ...:
5379             if isinstance(obj, collections.abc.Mapping):
5380                 yield from obj.values()
5381             elif is_sequence(obj):
5382                 yield from obj
5383             elif isinstance(obj, re.Match):
5384                 yield from obj.groups()
5385             elif traverse_string:
5386                 yield from str(obj)
5387
5388         elif callable(key):
5389             if is_sequence(obj):
5390                 iter_obj = enumerate(obj)
5391             elif isinstance(obj, collections.abc.Mapping):
5392                 iter_obj = obj.items()
5393             elif isinstance(obj, re.Match):
5394                 iter_obj = enumerate((obj.group(), *obj.groups()))
5395             elif traverse_string:
5396                 iter_obj = enumerate(str(obj))
5397             else:
5398                 return
5399             yield from (v for k, v in iter_obj if try_call(key, args=(k, v)))
5400
5401         elif isinstance(key, dict):
5402             iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
5403             yield {k: v if v is not None else default for k, v in iter_obj
5404                    if v is not None or default is not NO_DEFAULT}
5405
5406         elif isinstance(obj, collections.abc.Mapping):
5407             yield (obj.get(key) if casesense or (key in obj)
5408                    else next((v for k, v in obj.items() if casefold(k) == key), None))
5409
5410         elif isinstance(obj, re.Match):
5411             if isinstance(key, int) or casesense:
5412                 with contextlib.suppress(IndexError):
5413                     yield obj.group(key)
5414                     return
5415
5416             if not isinstance(key, str):
5417                 return
5418
5419             yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
5420
5421         else:
5422             if is_user_input:
5423                 key = (int_or_none(key) if ':' not in key
5424                        else slice(*map(int_or_none, key.split(':'))))
5425
5426             if not isinstance(key, (int, slice)):
5427                 return
5428
5429             if not is_sequence(obj):
5430                 if not traverse_string:
5431                     return
5432                 obj = str(obj)
5433
5434             with contextlib.suppress(IndexError):
5435                 yield obj[key]
5436
5437     def apply_path(start_obj, path):
5438         objs = (start_obj,)
5439         has_branched = False
5440
5441         for key in variadic(path):
5442             if is_user_input and key == ':':
5443                 key = ...
5444
5445             if not casesense and isinstance(key, str):
5446                 key = key.casefold()
5447
5448             if key is ... or isinstance(key, (list, tuple)) or callable(key):
5449                 has_branched = True
5450
5451             key_func = functools.partial(apply_key, key)
5452             objs = itertools.chain.from_iterable(map(key_func, objs))
5453
5454         return has_branched, objs
5455
5456     def _traverse_obj(obj, path, use_list=True):
5457         has_branched, results = apply_path(obj, path)
5458         results = LazyList(x for x in map(type_test, results) if x is not None)
5459
5460         if get_all and has_branched:
5461             return results.exhaust() if results or use_list else None
5462
5463         return results[0] if results else None
5464
5465     for index, path in enumerate(paths, 1):
5466         use_list = default is NO_DEFAULT and index == len(paths)
5467         result = _traverse_obj(obj, path, use_list)
5468         if result is not None:
5469             return result
5470
5471     return None if default is NO_DEFAULT else default
5472
5473
5474 def traverse_dict(dictn, keys, casesense=True):
5475     deprecation_warning(f'"{__name__}.traverse_dict" is deprecated and may be removed '
5476                         f'in a future version. Use "{__name__}.traverse_obj" instead')
5477     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5478
5479
5480 def get_first(obj, keys, **kwargs):
5481     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5482
5483
5484 def time_seconds(**kwargs):
5485     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5486     return t.timestamp()
5487
5488
5489 # create a JSON Web Signature (jws) with HS256 algorithm
5490 # the resulting format is in JWS Compact Serialization
5491 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5492 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5493 def jwt_encode_hs256(payload_data, key, headers={}):
5494     header_data = {
5495         'alg': 'HS256',
5496         'typ': 'JWT',
5497     }
5498     if headers:
5499         header_data.update(headers)
5500     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5501     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5502     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5503     signature_b64 = base64.b64encode(h.digest())
5504     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5505     return token
5506
5507
5508 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5509 def jwt_decode_hs256(jwt):
5510     header_b64, payload_b64, signature_b64 = jwt.split('.')
5511     # add trailing ='s that may have been stripped, superfluous ='s are ignored
5512     payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
5513     return payload_data
5514
5515
5516 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5517
5518
5519 @functools.cache
5520 def supports_terminal_sequences(stream):
5521     if compat_os_name == 'nt':
5522         if not WINDOWS_VT_MODE:
5523             return False
5524     elif not os.getenv('TERM'):
5525         return False
5526     try:
5527         return stream.isatty()
5528     except BaseException:
5529         return False
5530
5531
5532 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5533     if get_windows_version() < (10, 0, 10586):
5534         return
5535     global WINDOWS_VT_MODE
5536     try:
5537         Popen.run('', shell=True)
5538     except Exception:
5539         return
5540
5541     WINDOWS_VT_MODE = True
5542     supports_terminal_sequences.cache_clear()
5543
5544
5545 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5546
5547
5548 def remove_terminal_sequences(string):
5549     return _terminal_sequences_re.sub('', string)
5550
5551
5552 def number_of_digits(number):
5553     return len('%d' % number)
5554
5555
5556 def join_nonempty(*values, delim='-', from_dict=None):
5557     if from_dict is not None:
5558         values = (traverse_obj(from_dict, variadic(v)) for v in values)
5559     return delim.join(map(str, filter(None, values)))
5560
5561
5562 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5563     """
5564     Find the largest format dimensions in terms of video width and, for each thumbnail:
5565     * Modify the URL: Match the width with the provided regex and replace with the former width
5566     * Update dimensions
5567
5568     This function is useful with video services that scale the provided thumbnails on demand
5569     """
5570     _keys = ('width', 'height')
5571     max_dimensions = max(
5572         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5573         default=(0, 0))
5574     if not max_dimensions[0]:
5575         return thumbnails
5576     return [
5577         merge_dicts(
5578             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5579             dict(zip(_keys, max_dimensions)), thumbnail)
5580         for thumbnail in thumbnails
5581     ]
5582
5583
5584 def parse_http_range(range):
5585     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5586     if not range:
5587         return None, None, None
5588     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5589     if not crg:
5590         return None, None, None
5591     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5592
5593
5594 def read_stdin(what):
5595     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5596     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5597     return sys.stdin
5598
5599
5600 def determine_file_encoding(data):
5601     """
5602     Detect the text encoding used
5603     @returns (encoding, bytes to skip)
5604     """
5605
5606     # BOM marks are given priority over declarations
5607     for bom, enc in BOMS:
5608         if data.startswith(bom):
5609             return enc, len(bom)
5610
5611     # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
5612     # We ignore the endianness to get a good enough match
5613     data = data.replace(b'\0', b'')
5614     mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
5615     return mobj.group(1).decode() if mobj else None, 0
5616
5617
5618 class Config:
5619     own_args = None
5620     parsed_args = None
5621     filename = None
5622     __initialized = False
5623
5624     def __init__(self, parser, label=None):
5625         self.parser, self.label = parser, label
5626         self._loaded_paths, self.configs = set(), []
5627
5628     def init(self, args=None, filename=None):
5629         assert not self.__initialized
5630         self.own_args, self.filename = args, filename
5631         return self.load_configs()
5632
5633     def load_configs(self):
5634         directory = ''
5635         if self.filename:
5636             location = os.path.realpath(self.filename)
5637             directory = os.path.dirname(location)
5638             if location in self._loaded_paths:
5639                 return False
5640             self._loaded_paths.add(location)
5641
5642         self.__initialized = True
5643         opts, _ = self.parser.parse_known_args(self.own_args)
5644         self.parsed_args = self.own_args
5645         for location in opts.config_locations or []:
5646             if location == '-':
5647                 if location in self._loaded_paths:
5648                     continue
5649                 self._loaded_paths.add(location)
5650                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5651                 continue
5652             location = os.path.join(directory, expand_path(location))
5653             if os.path.isdir(location):
5654                 location = os.path.join(location, 'yt-dlp.conf')
5655             if not os.path.exists(location):
5656                 self.parser.error(f'config location {location} does not exist')
5657             self.append_config(self.read_file(location), location)
5658         return True
5659
5660     def __str__(self):
5661         label = join_nonempty(
5662             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5663             delim=' ')
5664         return join_nonempty(
5665             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5666             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5667             delim='\n')
5668
5669     @staticmethod
5670     def read_file(filename, default=[]):
5671         try:
5672             optionf = open(filename, 'rb')
5673         except OSError:
5674             return default  # silently skip if file is not present
5675         try:
5676             enc, skip = determine_file_encoding(optionf.read(512))
5677             optionf.seek(skip, io.SEEK_SET)
5678         except OSError:
5679             enc = None  # silently skip read errors
5680         try:
5681             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5682             contents = optionf.read().decode(enc or preferredencoding())
5683             res = shlex.split(contents, comments=True)
5684         except Exception as err:
5685             raise ValueError(f'Unable to parse "{filename}": {err}')
5686         finally:
5687             optionf.close()
5688         return res
5689
5690     @staticmethod
5691     def hide_login_info(opts):
5692         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5693         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5694
5695         def _scrub_eq(o):
5696             m = eqre.match(o)
5697             if m:
5698                 return m.group('key') + '=PRIVATE'
5699             else:
5700                 return o
5701
5702         opts = list(map(_scrub_eq, opts))
5703         for idx, opt in enumerate(opts):
5704             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5705                 opts[idx + 1] = 'PRIVATE'
5706         return opts
5707
5708     def append_config(self, *args, label=None):
5709         config = type(self)(self.parser, label)
5710         config._loaded_paths = self._loaded_paths
5711         if config.init(*args):
5712             self.configs.append(config)
5713
5714     @property
5715     def all_args(self):
5716         for config in reversed(self.configs):
5717             yield from config.all_args
5718         yield from self.parsed_args or []
5719
5720     def parse_known_args(self, **kwargs):
5721         return self.parser.parse_known_args(self.all_args, **kwargs)
5722
5723     def parse_args(self):
5724         return self.parser.parse_args(self.all_args)
5725
5726
5727 class WebSocketsWrapper:
5728     """Wraps websockets module to use in non-async scopes"""
5729     pool = None
5730
5731     def __init__(self, url, headers=None, connect=True):
5732         self.loop = asyncio.new_event_loop()
5733         # XXX: "loop" is deprecated
5734         self.conn = websockets.connect(
5735             url, extra_headers=headers, ping_interval=None,
5736             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5737         if connect:
5738             self.__enter__()
5739         atexit.register(self.__exit__, None, None, None)
5740
5741     def __enter__(self):
5742         if not self.pool:
5743             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5744         return self
5745
5746     def send(self, *args):
5747         self.run_with_loop(self.pool.send(*args), self.loop)
5748
5749     def recv(self, *args):
5750         return self.run_with_loop(self.pool.recv(*args), self.loop)
5751
5752     def __exit__(self, type, value, traceback):
5753         try:
5754             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5755         finally:
5756             self.loop.close()
5757             self._cancel_all_tasks(self.loop)
5758
5759     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5760     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5761     @staticmethod
5762     def run_with_loop(main, loop):
5763         if not asyncio.iscoroutine(main):
5764             raise ValueError(f'a coroutine was expected, got {main!r}')
5765
5766         try:
5767             return loop.run_until_complete(main)
5768         finally:
5769             loop.run_until_complete(loop.shutdown_asyncgens())
5770             if hasattr(loop, 'shutdown_default_executor'):
5771                 loop.run_until_complete(loop.shutdown_default_executor())
5772
5773     @staticmethod
5774     def _cancel_all_tasks(loop):
5775         to_cancel = asyncio.all_tasks(loop)
5776
5777         if not to_cancel:
5778             return
5779
5780         for task in to_cancel:
5781             task.cancel()
5782
5783         # XXX: "loop" is removed in python 3.10+
5784         loop.run_until_complete(
5785             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5786
5787         for task in to_cancel:
5788             if task.cancelled():
5789                 continue
5790             if task.exception() is not None:
5791                 loop.call_exception_handler({
5792                     'message': 'unhandled exception during asyncio.run() shutdown',
5793                     'exception': task.exception(),
5794                     'task': task,
5795                 })
5796
5797
5798 def merge_headers(*dicts):
5799     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5800     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5801
5802
5803 def cached_method(f):
5804     """Cache a method"""
5805     signature = inspect.signature(f)
5806
5807     @functools.wraps(f)
5808     def wrapper(self, *args, **kwargs):
5809         bound_args = signature.bind(self, *args, **kwargs)
5810         bound_args.apply_defaults()
5811         key = tuple(bound_args.arguments.values())[1:]
5812
5813         cache = vars(self).setdefault('__cached_method__cache', {}).setdefault(f.__name__, {})
5814         if key not in cache:
5815             cache[key] = f(self, *args, **kwargs)
5816         return cache[key]
5817     return wrapper
5818
5819
5820 class classproperty:
5821     """property access for class methods"""
5822
5823     def __init__(self, func):
5824         functools.update_wrapper(self, func)
5825         self.func = func
5826
5827     def __get__(self, _, cls):
5828         return self.func(cls)
5829
5830
5831 class Namespace(types.SimpleNamespace):
5832     """Immutable namespace"""
5833
5834     def __iter__(self):
5835         return iter(self.__dict__.values())
5836
5837     @property
5838     def items_(self):
5839         return self.__dict__.items()
5840
5841
5842 MEDIA_EXTENSIONS = Namespace(
5843     common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
5844     video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
5845     common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5846     audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'),
5847     thumbnails=('jpg', 'png', 'webp'),
5848     storyboards=('mhtml', ),
5849     subtitles=('srt', 'vtt', 'ass', 'lrc'),
5850     manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5851 )
5852 MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
5853 MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
5854
5855 KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
5856
5857
5858 class RetryManager:
5859     """Usage:
5860         for retry in RetryManager(...):
5861             try:
5862                 ...
5863             except SomeException as err:
5864                 retry.error = err
5865                 continue
5866     """
5867     attempt, _error = 0, None
5868
5869     def __init__(self, _retries, _error_callback, **kwargs):
5870         self.retries = _retries or 0
5871         self.error_callback = functools.partial(_error_callback, **kwargs)
5872
5873     def _should_retry(self):
5874         return self._error is not NO_DEFAULT and self.attempt <= self.retries
5875
5876     @property
5877     def error(self):
5878         if self._error is NO_DEFAULT:
5879             return None
5880         return self._error
5881
5882     @error.setter
5883     def error(self, value):
5884         self._error = value
5885
5886     def __iter__(self):
5887         while self._should_retry():
5888             self.error = NO_DEFAULT
5889             self.attempt += 1
5890             yield self
5891             if self.error:
5892                 self.error_callback(self.error, self.attempt, self.retries)
5893
5894     @staticmethod
5895     def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
5896         """Utility function for reporting retries"""
5897         if count > retries:
5898             if error:
5899                 return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
5900             raise e
5901
5902         if not count:
5903             return warn(e)
5904         elif isinstance(e, ExtractorError):
5905             e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
5906         warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5907
5908         delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
5909         if delay:
5910             info(f'Sleeping {delay:.2f} seconds ...')
5911             time.sleep(delay)
5912
5913
5914 def make_archive_id(ie, video_id):
5915     ie_key = ie if isinstance(ie, str) else ie.ie_key()
5916     return f'{ie_key.lower()} {video_id}'
5917
5918
5919 def truncate_string(s, left, right=0):
5920     assert left > 3 and right >= 0
5921     if s is None or len(s) <= left + right:
5922         return s
5923     return f'{s[:left-3]}...{s[-right:]}'
5924
5925
5926 def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
5927     assert 'all' in alias_dict, '"all" alias is required'
5928     requested = list(start or [])
5929     for val in options:
5930         discard = val.startswith('-')
5931         if discard:
5932             val = val[1:]
5933
5934         if val in alias_dict:
5935             val = alias_dict[val] if not discard else [
5936                 i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
5937             # NB: Do not allow regex in aliases for performance
5938             requested = orderedSet_from_options(val, alias_dict, start=requested)
5939             continue
5940
5941         current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
5942                    else [val] if val in alias_dict['all'] else None)
5943         if current is None:
5944             raise ValueError(val)
5945
5946         if discard:
5947             for item in current:
5948                 while item in requested:
5949                     requested.remove(item)
5950         else:
5951             requested.extend(current)
5952
5953     return orderedSet(requested)
5954
5955
5956 # Deprecated
5957 has_certifi = bool(certifi)
5958 has_websockets = bool(websockets)